From b4d0a09f15c60c88bbf516d2f6dcdb451dcad9c7 Mon Sep 17 00:00:00 2001
From: George Marques <george@gmarqu.es>
Date: Mon, 22 Jan 2024 11:31:55 -0300
Subject: GDScript: Reintroduce binary tokenization on export

This adds back a function available in 3.x: exporting the GDScript
files in a binary form by converting the tokens recognized by the
tokenizer into a data format.

It is enabled by default on export but can be manually disabled. The
format helps with loading times since, the tokens are easily
reconstructed, and with hiding the source code, since recovering it
would require a specialized tool. Code comments are not stored in this
format.

The `--test` command can also include a `--use-binary-tokens` flag
which will run the GDScript tests with the binary format instead of the
regular source code by converting them in-memory before the test runs.
---
 modules/gdscript/gdscript_tokenizer_buffer.cpp | 457 +++++++++++++++++++++++++
 1 file changed, 457 insertions(+)
 create mode 100644 modules/gdscript/gdscript_tokenizer_buffer.cpp

(limited to 'modules/gdscript/gdscript_tokenizer_buffer.cpp')

diff --git a/modules/gdscript/gdscript_tokenizer_buffer.cpp b/modules/gdscript/gdscript_tokenizer_buffer.cpp
new file mode 100644
index 0000000000..5b41c411d8
--- /dev/null
+++ b/modules/gdscript/gdscript_tokenizer_buffer.cpp
@@ -0,0 +1,457 @@
+/**************************************************************************/
+/*  gdscript_tokenizer_buffer.cpp                                         */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#include "gdscript_tokenizer_buffer.h"
+
+#include "core/io/marshalls.h"
+
+#define TOKENIZER_VERSION 100
+
+int GDScriptTokenizerBuffer::_token_to_binary(const Token &p_token, Vector<uint8_t> &r_buffer, int p_start, HashMap<StringName, uint32_t> &r_identifiers_map, HashMap<Variant, uint32_t, VariantHasher, VariantComparator> &r_constants_map) {
+	int pos = p_start;
+
+	int token_type = p_token.type & TOKEN_MASK;
+
+	switch (p_token.type) {
+		case GDScriptTokenizer::Token::ANNOTATION:
+		case GDScriptTokenizer::Token::IDENTIFIER: {
+			// Add identifier to map.
+			int identifier_pos;
+			StringName id = p_token.get_identifier();
+			if (r_identifiers_map.has(id)) {
+				identifier_pos = r_identifiers_map[id];
+			} else {
+				identifier_pos = r_identifiers_map.size();
+				r_identifiers_map[id] = identifier_pos;
+			}
+			token_type |= identifier_pos << TOKEN_BITS;
+		} break;
+		case GDScriptTokenizer::Token::ERROR:
+		case GDScriptTokenizer::Token::LITERAL: {
+			// Add literal to map.
+			int constant_pos;
+			if (r_constants_map.has(p_token.literal)) {
+				constant_pos = r_constants_map[p_token.literal];
+			} else {
+				constant_pos = r_constants_map.size();
+				r_constants_map[p_token.literal] = constant_pos;
+			}
+			token_type |= constant_pos << TOKEN_BITS;
+		} break;
+		default:
+			break;
+	}
+
+	// Encode token.
+	int token_len;
+	if (token_type & TOKEN_MASK) {
+		token_len = 8;
+		r_buffer.resize(pos + token_len);
+		encode_uint32(token_type | TOKEN_BYTE_MASK, &r_buffer.write[pos]);
+		pos += 4;
+	} else {
+		token_len = 5;
+		r_buffer.resize(pos + token_len);
+		r_buffer.write[pos] = token_type;
+		pos++;
+	}
+	encode_uint32(p_token.start_line, &r_buffer.write[pos]);
+	return token_len;
+}
+
+GDScriptTokenizer::Token GDScriptTokenizerBuffer::_binary_to_token(const uint8_t *p_buffer) {
+	Token token;
+	const uint8_t *b = p_buffer;
+
+	uint32_t token_type = decode_uint32(b);
+	token.type = (Token::Type)(token_type & TOKEN_MASK);
+	if (token_type & TOKEN_BYTE_MASK) {
+		b += 4;
+	} else {
+		b++;
+	}
+	token.start_line = decode_uint32(b);
+	token.end_line = token.start_line;
+
+	token.literal = token.get_name();
+	if (token.type == Token::CONST_NAN) {
+		token.literal = String("NAN"); // Special case since name and notation are different.
+	}
+
+	switch (token.type) {
+		case GDScriptTokenizer::Token::ANNOTATION:
+		case GDScriptTokenizer::Token::IDENTIFIER: {
+			// Get name from map.
+			int identifier_pos = token_type >> TOKEN_BITS;
+			if (unlikely(identifier_pos >= identifiers.size())) {
+				Token error;
+				error.type = Token::ERROR;
+				error.literal = "Identifier index out of bounds.";
+				return error;
+			}
+			token.literal = identifiers[identifier_pos];
+		} break;
+		case GDScriptTokenizer::Token::ERROR:
+		case GDScriptTokenizer::Token::LITERAL: {
+			// Get literal from map.
+			int constant_pos = token_type >> TOKEN_BITS;
+			if (unlikely(constant_pos >= constants.size())) {
+				Token error;
+				error.type = Token::ERROR;
+				error.literal = "Constant index out of bounds.";
+				return error;
+			}
+			token.literal = constants[constant_pos];
+		} break;
+		default:
+			break;
+	}
+
+	return token;
+}
+
+Error GDScriptTokenizerBuffer::set_code_buffer(const Vector<uint8_t> &p_buffer) {
+	const uint8_t *buf = p_buffer.ptr();
+	int total_len = p_buffer.size();
+	ERR_FAIL_COND_V(p_buffer.size() < 24 || p_buffer[0] != 'G' || p_buffer[1] != 'D' || p_buffer[2] != 'S' || p_buffer[3] != 'C', ERR_INVALID_DATA);
+
+	int version = decode_uint32(&buf[4]);
+	ERR_FAIL_COND_V_MSG(version > TOKENIZER_VERSION, ERR_INVALID_DATA, "Binary GDScript is too recent! Please use a newer engine version.");
+
+	uint32_t identifier_count = decode_uint32(&buf[8]);
+	uint32_t constant_count = decode_uint32(&buf[12]);
+	uint32_t token_line_count = decode_uint32(&buf[16]);
+	uint32_t token_count = decode_uint32(&buf[20]);
+
+	const uint8_t *b = &buf[24];
+	total_len -= 24;
+
+	identifiers.resize(identifier_count);
+	for (uint32_t i = 0; i < identifier_count; i++) {
+		uint32_t len = decode_uint32(b);
+		total_len -= 4;
+		ERR_FAIL_COND_V((len * 4u) > (uint32_t)total_len, ERR_INVALID_DATA);
+		b += 4;
+		Vector<uint32_t> cs;
+		cs.resize(len);
+		for (uint32_t j = 0; j < len; j++) {
+			uint8_t tmp[4];
+			for (uint32_t k = 0; k < 4; k++) {
+				tmp[k] = b[j * 4 + k] ^ 0xb6;
+			}
+			cs.write[j] = decode_uint32(tmp);
+		}
+
+		String s(reinterpret_cast<const char32_t *>(cs.ptr()), len);
+		b += len * 4;
+		total_len -= len * 4;
+		identifiers.write[i] = s;
+	}
+
+	constants.resize(constant_count);
+	for (uint32_t i = 0; i < constant_count; i++) {
+		Variant v;
+		int len;
+		Error err = decode_variant(v, b, total_len, &len, false);
+		if (err) {
+			return err;
+		}
+		b += len;
+		total_len -= len;
+		constants.write[i] = v;
+	}
+
+	for (uint32_t i = 0; i < token_line_count; i++) {
+		ERR_FAIL_COND_V(total_len < 8, ERR_INVALID_DATA);
+		uint32_t token_index = decode_uint32(b);
+		b += 4;
+		uint32_t line = decode_uint32(b);
+		b += 4;
+		total_len -= 8;
+		token_lines[token_index] = line;
+	}
+	for (uint32_t i = 0; i < token_line_count; i++) {
+		ERR_FAIL_COND_V(total_len < 8, ERR_INVALID_DATA);
+		uint32_t token_index = decode_uint32(b);
+		b += 4;
+		uint32_t column = decode_uint32(b);
+		b += 4;
+		total_len -= 8;
+		token_columns[token_index] = column;
+	}
+
+	tokens.resize(token_count);
+	for (uint32_t i = 0; i < token_count; i++) {
+		int token_len = 5;
+		if ((*b) & TOKEN_BYTE_MASK) {
+			token_len = 8;
+		}
+		ERR_FAIL_COND_V(total_len < token_len, ERR_INVALID_DATA);
+		Token token = _binary_to_token(b);
+		b += token_len;
+		ERR_FAIL_INDEX_V(token.type, Token::TK_MAX, ERR_INVALID_DATA);
+		tokens.write[i] = token;
+		total_len -= token_len;
+	}
+
+	ERR_FAIL_COND_V(total_len > 0, ERR_INVALID_DATA);
+
+	return OK;
+}
+
+Vector<uint8_t> GDScriptTokenizerBuffer::parse_code_string(const String &p_code) {
+	Vector<uint8_t> buf;
+
+	HashMap<StringName, uint32_t> identifier_map;
+	HashMap<Variant, uint32_t, VariantHasher, VariantComparator> constant_map;
+	Vector<uint8_t> token_buffer;
+	HashMap<uint32_t, uint32_t> token_lines;
+	HashMap<uint32_t, uint32_t> token_columns;
+
+	GDScriptTokenizerText tokenizer;
+	tokenizer.set_source_code(p_code);
+	tokenizer.set_multiline_mode(true); // Ignore whitespace tokens.
+	Token current = tokenizer.scan();
+	int token_pos = 0;
+	int last_token_line = 0;
+	int token_counter = 0;
+
+	while (current.type != Token::TK_EOF) {
+		int token_len = _token_to_binary(current, token_buffer, token_pos, identifier_map, constant_map);
+		token_pos += token_len;
+		if (token_counter > 0 && current.start_line > last_token_line) {
+			token_lines[token_counter] = current.start_line;
+			token_columns[token_counter] = current.start_column;
+		}
+		last_token_line = current.end_line;
+
+		current = tokenizer.scan();
+		token_counter++;
+	}
+
+	// Reverse maps.
+	Vector<StringName> rev_identifier_map;
+	rev_identifier_map.resize(identifier_map.size());
+	for (const KeyValue<StringName, uint32_t> &E : identifier_map) {
+		rev_identifier_map.write[E.value] = E.key;
+	}
+	Vector<Variant> rev_constant_map;
+	rev_constant_map.resize(constant_map.size());
+	for (const KeyValue<Variant, uint32_t> &E : constant_map) {
+		rev_constant_map.write[E.value] = E.key;
+	}
+	HashMap<uint32_t, uint32_t> rev_token_lines;
+	for (const KeyValue<uint32_t, uint32_t> &E : token_lines) {
+		rev_token_lines[E.value] = E.key;
+	}
+
+	// Remove continuation lines from map.
+	for (int line : tokenizer.get_continuation_lines()) {
+		if (rev_token_lines.has(line + 1)) {
+			token_lines.erase(rev_token_lines[line + 1]);
+			token_columns.erase(rev_token_lines[line + 1]);
+		}
+	}
+
+	// Save header.
+	buf.resize(24);
+	buf.write[0] = 'G';
+	buf.write[1] = 'D';
+	buf.write[2] = 'S';
+	buf.write[3] = 'C';
+	encode_uint32(TOKENIZER_VERSION, &buf.write[4]);
+	encode_uint32(identifier_map.size(), &buf.write[8]);
+	encode_uint32(constant_map.size(), &buf.write[12]);
+	encode_uint32(token_lines.size(), &buf.write[16]);
+	encode_uint32(token_counter, &buf.write[20]);
+
+	int buf_pos = 24;
+
+	// Save identifiers.
+	for (const StringName &id : rev_identifier_map) {
+		String s = id.operator String();
+		int len = s.length();
+
+		buf.resize(buf_pos + (len + 1) * 4);
+
+		encode_uint32(len, &buf.write[buf_pos]);
+		buf_pos += 4;
+
+		for (int i = 0; i < len; i++) {
+			uint8_t tmp[4];
+			encode_uint32(s[i], tmp);
+
+			for (int b = 0; b < 4; b++) {
+				buf.write[buf_pos + b] = tmp[b] ^ 0xb6;
+			}
+
+			buf_pos += 4;
+		}
+	}
+
+	// Save constants.
+	for (const Variant &v : rev_constant_map) {
+		int len;
+		// Objects cannot be constant, never encode objects.
+		Error err = encode_variant(v, nullptr, len, false);
+		ERR_FAIL_COND_V_MSG(err != OK, Vector<uint8_t>(), "Error when trying to encode Variant.");
+		buf.resize(buf_pos + len);
+		encode_variant(v, &buf.write[buf_pos], len, false);
+		buf_pos += len;
+	}
+
+	// Save lines and columns.
+	buf.resize(buf_pos + token_lines.size() * 16);
+	for (const KeyValue<uint32_t, uint32_t> &e : token_lines) {
+		encode_uint32(e.key, &buf.write[buf_pos]);
+		buf_pos += 4;
+		encode_uint32(e.value, &buf.write[buf_pos]);
+		buf_pos += 4;
+	}
+	for (const KeyValue<uint32_t, uint32_t> &e : token_columns) {
+		encode_uint32(e.key, &buf.write[buf_pos]);
+		buf_pos += 4;
+		encode_uint32(e.value, &buf.write[buf_pos]);
+		buf_pos += 4;
+	}
+
+	// Store tokens.
+	buf.append_array(token_buffer);
+
+	return buf;
+}
+
+int GDScriptTokenizerBuffer::get_cursor_line() const {
+	return 0;
+}
+
+int GDScriptTokenizerBuffer::get_cursor_column() const {
+	return 0;
+}
+
+void GDScriptTokenizerBuffer::set_cursor_position(int p_line, int p_column) {
+}
+
+void GDScriptTokenizerBuffer::set_multiline_mode(bool p_state) {
+	multiline_mode = p_state;
+}
+
+bool GDScriptTokenizerBuffer::is_past_cursor() const {
+	return false;
+}
+
+void GDScriptTokenizerBuffer::push_expression_indented_block() {
+	indent_stack_stack.push_back(indent_stack);
+}
+
+void GDScriptTokenizerBuffer::pop_expression_indented_block() {
+	ERR_FAIL_COND(indent_stack_stack.size() == 0);
+	indent_stack = indent_stack_stack.back()->get();
+	indent_stack_stack.pop_back();
+}
+
+GDScriptTokenizer::Token GDScriptTokenizerBuffer::scan() {
+	// Add final newline.
+	if (current >= tokens.size() && !last_token_was_newline) {
+		Token newline;
+		newline.type = Token::NEWLINE;
+		newline.start_line = current_line;
+		newline.end_line = current_line;
+		last_token_was_newline = true;
+		return newline;
+	}
+
+	// Resolve pending indentation change.
+	if (pending_indents > 0) {
+		pending_indents--;
+		Token indent;
+		indent.type = Token::INDENT;
+		indent.start_line = current_line;
+		indent.end_line = current_line;
+		return indent;
+	} else if (pending_indents < 0) {
+		pending_indents++;
+		Token dedent;
+		dedent.type = Token::DEDENT;
+		dedent.start_line = current_line;
+		dedent.end_line = current_line;
+		return dedent;
+	}
+
+	if (current >= tokens.size()) {
+		if (!indent_stack.is_empty()) {
+			pending_indents -= indent_stack.size();
+			indent_stack.clear();
+			return scan();
+		}
+		Token eof;
+		eof.type = Token::TK_EOF;
+		return eof;
+	};
+
+	if (!last_token_was_newline && token_lines.has(current)) {
+		current_line = token_lines[current];
+		uint32_t current_column = token_columns[current];
+
+		// Check if there's a need to indent/dedent.
+		if (!multiline_mode) {
+			uint32_t previous_indent = 0;
+			if (!indent_stack.is_empty()) {
+				previous_indent = indent_stack.back()->get();
+			}
+			if (current_column - 1 > previous_indent) {
+				pending_indents++;
+				indent_stack.push_back(current_column - 1);
+			} else {
+				while (current_column - 1 < previous_indent) {
+					pending_indents--;
+					indent_stack.pop_back();
+					if (indent_stack.is_empty()) {
+						break;
+					}
+					previous_indent = indent_stack.back()->get();
+				}
+			}
+
+			Token newline;
+			newline.type = Token::NEWLINE;
+			newline.start_line = current_line;
+			newline.end_line = current_line;
+			last_token_was_newline = true;
+
+			return newline;
+		}
+	}
+
+	last_token_was_newline = false;
+
+	Token token = tokens[current++];
+	return token;
+}
-- 
cgit v1.2.3


From 72e5f8c31e9237a4581cf250aa9943a4290f4017 Mon Sep 17 00:00:00 2001
From: George Marques <george@gmarqu.es>
Date: Fri, 26 Jan 2024 14:49:31 -0300
Subject: GDScript: Enable compression on export

Besides the regular option to export GDScript as binary tokens, this
also includes a compression option on top of it. The binary format
needs to encode some information which generally makes it bigger than
the source text. This option reduces that difference by using Zstandard
compression on the buffer.
---
 modules/gdscript/gdscript_tokenizer_buffer.cpp | 106 +++++++++++++++++--------
 1 file changed, 71 insertions(+), 35 deletions(-)

(limited to 'modules/gdscript/gdscript_tokenizer_buffer.cpp')

diff --git a/modules/gdscript/gdscript_tokenizer_buffer.cpp b/modules/gdscript/gdscript_tokenizer_buffer.cpp
index 5b41c411d8..db523ea941 100644
--- a/modules/gdscript/gdscript_tokenizer_buffer.cpp
+++ b/modules/gdscript/gdscript_tokenizer_buffer.cpp
@@ -30,6 +30,7 @@
 
 #include "gdscript_tokenizer_buffer.h"
 
+#include "core/io/compression.h"
 #include "core/io/marshalls.h"
 
 #define TOKENIZER_VERSION 100
@@ -139,19 +140,31 @@ GDScriptTokenizer::Token GDScriptTokenizerBuffer::_binary_to_token(const uint8_t
 
 Error GDScriptTokenizerBuffer::set_code_buffer(const Vector<uint8_t> &p_buffer) {
 	const uint8_t *buf = p_buffer.ptr();
-	int total_len = p_buffer.size();
-	ERR_FAIL_COND_V(p_buffer.size() < 24 || p_buffer[0] != 'G' || p_buffer[1] != 'D' || p_buffer[2] != 'S' || p_buffer[3] != 'C', ERR_INVALID_DATA);
+	ERR_FAIL_COND_V(p_buffer.size() < 12 || p_buffer[0] != 'G' || p_buffer[1] != 'D' || p_buffer[2] != 'S' || p_buffer[3] != 'C', ERR_INVALID_DATA);
 
 	int version = decode_uint32(&buf[4]);
 	ERR_FAIL_COND_V_MSG(version > TOKENIZER_VERSION, ERR_INVALID_DATA, "Binary GDScript is too recent! Please use a newer engine version.");
 
-	uint32_t identifier_count = decode_uint32(&buf[8]);
-	uint32_t constant_count = decode_uint32(&buf[12]);
-	uint32_t token_line_count = decode_uint32(&buf[16]);
-	uint32_t token_count = decode_uint32(&buf[20]);
+	int decompressed_size = decode_uint32(&buf[8]);
 
-	const uint8_t *b = &buf[24];
-	total_len -= 24;
+	Vector<uint8_t> contents;
+	if (decompressed_size == 0) {
+		contents = p_buffer.slice(12);
+	} else {
+		contents.resize(decompressed_size);
+		int result = Compression::decompress(contents.ptrw(), contents.size(), &buf[12], p_buffer.size() - 12, Compression::MODE_ZSTD);
+		ERR_FAIL_COND_V_MSG(result != decompressed_size, ERR_INVALID_DATA, "Error decompressing GDScript tokenizer buffer.");
+	}
+
+	int total_len = contents.size();
+	buf = contents.ptr();
+	uint32_t identifier_count = decode_uint32(&buf[0]);
+	uint32_t constant_count = decode_uint32(&buf[4]);
+	uint32_t token_line_count = decode_uint32(&buf[8]);
+	uint32_t token_count = decode_uint32(&buf[16]);
+
+	const uint8_t *b = &buf[20];
+	total_len -= 20;
 
 	identifiers.resize(identifier_count);
 	for (uint32_t i = 0; i < identifier_count; i++) {
@@ -226,9 +239,7 @@ Error GDScriptTokenizerBuffer::set_code_buffer(const Vector<uint8_t> &p_buffer)
 	return OK;
 }
 
-Vector<uint8_t> GDScriptTokenizerBuffer::parse_code_string(const String &p_code) {
-	Vector<uint8_t> buf;
-
+Vector<uint8_t> GDScriptTokenizerBuffer::parse_code_string(const String &p_code, CompressMode p_compress_mode) {
 	HashMap<StringName, uint32_t> identifier_map;
 	HashMap<Variant, uint32_t, VariantHasher, VariantComparator> constant_map;
 	Vector<uint8_t> token_buffer;
@@ -280,28 +291,23 @@ Vector<uint8_t> GDScriptTokenizerBuffer::parse_code_string(const String &p_code)
 		}
 	}
 
-	// Save header.
-	buf.resize(24);
-	buf.write[0] = 'G';
-	buf.write[1] = 'D';
-	buf.write[2] = 'S';
-	buf.write[3] = 'C';
-	encode_uint32(TOKENIZER_VERSION, &buf.write[4]);
-	encode_uint32(identifier_map.size(), &buf.write[8]);
-	encode_uint32(constant_map.size(), &buf.write[12]);
-	encode_uint32(token_lines.size(), &buf.write[16]);
-	encode_uint32(token_counter, &buf.write[20]);
+	Vector<uint8_t> contents;
+	contents.resize(20);
+	encode_uint32(identifier_map.size(), &contents.write[0]);
+	encode_uint32(constant_map.size(), &contents.write[4]);
+	encode_uint32(token_lines.size(), &contents.write[8]);
+	encode_uint32(token_counter, &contents.write[16]);
 
-	int buf_pos = 24;
+	int buf_pos = 20;
 
 	// Save identifiers.
 	for (const StringName &id : rev_identifier_map) {
 		String s = id.operator String();
 		int len = s.length();
 
-		buf.resize(buf_pos + (len + 1) * 4);
+		contents.resize(buf_pos + (len + 1) * 4);
 
-		encode_uint32(len, &buf.write[buf_pos]);
+		encode_uint32(len, &contents.write[buf_pos]);
 		buf_pos += 4;
 
 		for (int i = 0; i < len; i++) {
@@ -309,7 +315,7 @@ Vector<uint8_t> GDScriptTokenizerBuffer::parse_code_string(const String &p_code)
 			encode_uint32(s[i], tmp);
 
 			for (int b = 0; b < 4; b++) {
-				buf.write[buf_pos + b] = tmp[b] ^ 0xb6;
+				contents.write[buf_pos + b] = tmp[b] ^ 0xb6;
 			}
 
 			buf_pos += 4;
@@ -322,28 +328,58 @@ Vector<uint8_t> GDScriptTokenizerBuffer::parse_code_string(const String &p_code)
 		// Objects cannot be constant, never encode objects.
 		Error err = encode_variant(v, nullptr, len, false);
 		ERR_FAIL_COND_V_MSG(err != OK, Vector<uint8_t>(), "Error when trying to encode Variant.");
-		buf.resize(buf_pos + len);
-		encode_variant(v, &buf.write[buf_pos], len, false);
+		contents.resize(buf_pos + len);
+		encode_variant(v, &contents.write[buf_pos], len, false);
 		buf_pos += len;
 	}
 
 	// Save lines and columns.
-	buf.resize(buf_pos + token_lines.size() * 16);
+	contents.resize(buf_pos + token_lines.size() * 16);
 	for (const KeyValue<uint32_t, uint32_t> &e : token_lines) {
-		encode_uint32(e.key, &buf.write[buf_pos]);
+		encode_uint32(e.key, &contents.write[buf_pos]);
 		buf_pos += 4;
-		encode_uint32(e.value, &buf.write[buf_pos]);
+		encode_uint32(e.value, &contents.write[buf_pos]);
 		buf_pos += 4;
 	}
 	for (const KeyValue<uint32_t, uint32_t> &e : token_columns) {
-		encode_uint32(e.key, &buf.write[buf_pos]);
+		encode_uint32(e.key, &contents.write[buf_pos]);
 		buf_pos += 4;
-		encode_uint32(e.value, &buf.write[buf_pos]);
+		encode_uint32(e.value, &contents.write[buf_pos]);
 		buf_pos += 4;
 	}
 
 	// Store tokens.
-	buf.append_array(token_buffer);
+	contents.append_array(token_buffer);
+
+	Vector<uint8_t> buf;
+
+	// Save header.
+	buf.resize(12);
+	buf.write[0] = 'G';
+	buf.write[1] = 'D';
+	buf.write[2] = 'S';
+	buf.write[3] = 'C';
+	encode_uint32(TOKENIZER_VERSION, &buf.write[4]);
+
+	switch (p_compress_mode) {
+		case COMPRESS_NONE:
+			encode_uint32(0u, &buf.write[8]);
+			buf.append_array(contents);
+			break;
+
+		case COMPRESS_ZSTD: {
+			encode_uint32(contents.size(), &buf.write[8]);
+			Vector<uint8_t> compressed;
+			int max_size = Compression::get_max_compressed_buffer_size(contents.size(), Compression::MODE_ZSTD);
+			compressed.resize(max_size);
+
+			int compressed_size = Compression::compress(compressed.ptrw(), contents.ptr(), contents.size(), Compression::MODE_ZSTD);
+			ERR_FAIL_COND_V_MSG(compressed_size < 0, Vector<uint8_t>(), "Error compressing GDScript tokenizer buffer.");
+			compressed.resize(compressed_size);
+
+			buf.append_array(compressed);
+		} break;
+	}
 
 	return buf;
 }
@@ -372,7 +408,7 @@ void GDScriptTokenizerBuffer::push_expression_indented_block() {
 }
 
 void GDScriptTokenizerBuffer::pop_expression_indented_block() {
-	ERR_FAIL_COND(indent_stack_stack.size() == 0);
+	ERR_FAIL_COND(indent_stack_stack.is_empty());
 	indent_stack = indent_stack_stack.back()->get();
 	indent_stack_stack.pop_back();
 }
-- 
cgit v1.2.3