From b4d0a09f15c60c88bbf516d2f6dcdb451dcad9c7 Mon Sep 17 00:00:00 2001
From: George Marques <george@gmarqu.es>
Date: Mon, 22 Jan 2024 11:31:55 -0300
Subject: GDScript: Reintroduce binary tokenization on export

This adds back a function available in 3.x: exporting the GDScript
files in a binary form by converting the tokens recognized by the
tokenizer into a data format.

It is enabled by default on export but can be manually disabled. The
format helps with loading times since, the tokens are easily
reconstructed, and with hiding the source code, since recovering it
would require a specialized tool. Code comments are not stored in this
format.

The `--test` command can also include a `--use-binary-tokens` flag
which will run the GDScript tests with the binary format instead of the
regular source code by converting them in-memory before the test runs.
---
 modules/gdscript/tests/test_gdscript.cpp | 61 ++++++++++++++++++++++++++++++--
 1 file changed, 58 insertions(+), 3 deletions(-)

(limited to 'modules/gdscript/tests/test_gdscript.cpp')
diff --git a/modules/gdscript/tests/test_gdscript.cpp b/modules/gdscript/tests/test_gdscript.cpp
index 467bedc4b2..e4fab68e06 100644
--- a/modules/gdscript/tests/test_gdscript.cpp
+++ b/modules/gdscript/tests/test_gdscript.cpp
@@ -34,6 +34,7 @@
 #include "../gdscript_compiler.h"
 #include "../gdscript_parser.h"
 #include "../gdscript_tokenizer.h"
+#include "../gdscript_tokenizer_buffer.h"
 
 #include "core/config/project_settings.h"
 #include "core/io/file_access.h"
@@ -50,7 +51,7 @@
 namespace GDScriptTests {
 
 static void test_tokenizer(const String &p_code, const Vector<String> &p_lines) {
-	GDScriptTokenizer tokenizer;
+	GDScriptTokenizerText tokenizer;
 	tokenizer.set_source_code(p_code);
 
 	int tab_size = 4;
@@ -107,6 +108,53 @@ static void test_tokenizer(const String &p_code, const Vector<String> &p_lines)
 	print_line(current.get_name()); // Should be EOF
 }
 
+static void test_tokenizer_buffer(const Vector<uint8_t> &p_buffer, const Vector<String> &p_lines);
+
+static void test_tokenizer_buffer(const String &p_code, const Vector<String> &p_lines) {
+	Vector<uint8_t> binary = GDScriptTokenizerBuffer::parse_code_string(p_code);
+	test_tokenizer_buffer(binary, p_lines);
+}
+
+static void test_tokenizer_buffer(const Vector<uint8_t> &p_buffer, const Vector<String> &p_lines) {
+	GDScriptTokenizerBuffer tokenizer;
+	tokenizer.set_code_buffer(p_buffer);
+
+	int tab_size = 4;
+#ifdef TOOLS_ENABLED
+	if (EditorSettings::get_singleton()) {
+		tab_size = EditorSettings::get_singleton()->get_setting("text_editor/behavior/indent/size");
+	}
+#endif // TOOLS_ENABLED
+	String tab = String(" ").repeat(tab_size);
+
+	GDScriptTokenizer::Token current = tokenizer.scan();
+	while (current.type != GDScriptTokenizer::Token::TK_EOF) {
+		StringBuilder token;
+		token += " --> "; // Padding for line number.
+
+		for (int l = current.start_line; l <= current.end_line && l <= p_lines.size(); l++) {
+			print_line(vformat("%04d %s", l, p_lines[l - 1]).replace("\t", tab));
+		}
+
+		token += current.get_name();
+
+		if (current.type == GDScriptTokenizer::Token::ERROR || current.type == GDScriptTokenizer::Token::LITERAL || current.type == GDScriptTokenizer::Token::IDENTIFIER || current.type == GDScriptTokenizer::Token::ANNOTATION) {
+			token += "(";
+			token += Variant::get_type_name(current.literal.get_type());
+			token += ") ";
+			token += current.literal;
+		}
+
+		print_line(token.as_string());
+
+		print_line("-------------------------------------------------------");
+
+		current = tokenizer.scan();
+	}
+
+	print_line(current.get_name()); // Should be EOF
+}
+
 static void test_parser(const String &p_code, const String &p_script_path, const Vector<String> &p_lines) {
 	GDScriptParser parser;
 	Error err = parser.parse(p_code, p_script_path, false);
@@ -119,7 +167,7 @@ static void test_parser(const String &p_code, const String &p_script_path, const
 	}
 
 	GDScriptAnalyzer analyzer(&parser);
-	analyzer.analyze();
+	err = analyzer.analyze();
 
 	if (err != OK) {
 		const List<GDScriptParser::ParserError> &errors = parser.get_errors();
@@ -212,7 +260,7 @@ void test(TestType p_type) {
 	}
 
 	String test = cmdlargs.back()->get();
-	if (!test.ends_with(".gd")) {
+	if (!test.ends_with(".gd") && !test.ends_with(".gdc")) {
 		print_line("This test expects a path to a GDScript file as its last parameter. Got: " + test);
 		return;
 	}
@@ -255,6 +303,13 @@ void test(TestType p_type) {
 		case TEST_TOKENIZER:
 			test_tokenizer(code, lines);
 			break;
+		case TEST_TOKENIZER_BUFFER:
+			if (test.ends_with(".gdc")) {
+				test_tokenizer_buffer(buf, lines);
+			} else {
+				test_tokenizer_buffer(code, lines);
+			}
+			break;
 		case TEST_PARSER:
 			test_parser(code, test, lines);
 			break;
-- 
cgit v1.2.3


From 72e5f8c31e9237a4581cf250aa9943a4290f4017 Mon Sep 17 00:00:00 2001
From: George Marques <george@gmarqu.es>
Date: Fri, 26 Jan 2024 14:49:31 -0300
Subject: GDScript: Enable compression on export

Besides the regular option to export GDScript as binary tokens, this
also includes a compression option on top of it. The binary format
needs to encode some information which generally makes it bigger than
the source text. This option reduces that difference by using Zstandard
compression on the buffer.
---
 modules/gdscript/tests/test_gdscript.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/gdscript/tests/test_gdscript.cpp')

diff --git a/modules/gdscript/tests/test_gdscript.cpp b/modules/gdscript/tests/test_gdscript.cpp
index e4fab68e06..f6965cf7cf 100644
--- a/modules/gdscript/tests/test_gdscript.cpp
+++ b/modules/gdscript/tests/test_gdscript.cpp
@@ -111,7 +111,7 @@ static void test_tokenizer(const String &p_code, const Vector<String> &p_lines)
 static void test_tokenizer_buffer(const Vector<uint8_t> &p_buffer, const Vector<String> &p_lines);
 
 static void test_tokenizer_buffer(const String &p_code, const Vector<String> &p_lines) {
-	Vector<uint8_t> binary = GDScriptTokenizerBuffer::parse_code_string(p_code);
+	Vector<uint8_t> binary = GDScriptTokenizerBuffer::parse_code_string(p_code, GDScriptTokenizerBuffer::COMPRESS_NONE);
 	test_tokenizer_buffer(binary, p_lines);
 }
 
-- 
cgit v1.2.3