diff --git a/.github/workflows/java-wasm-bindings.yml b/.github/workflows/java-wasm-bindings.yml index 2d6a574ee1..e3919472c3 100644 --- a/.github/workflows/java-wasm-bindings.yml +++ b/.github/workflows/java-wasm-bindings.yml @@ -15,7 +15,7 @@ on: pull_request: jobs: - build: + build-wasm: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 356956ec8f..6b3888f57d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -188,6 +188,18 @@ jobs: - name: Run Java Loader test run: PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS=1 JRUBY_OPTS="-J-ea" bundle exec rake test:java_loader + build-java-truffleruby: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - name: Set up JRuby + uses: ruby/setup-ruby@v1 + with: + ruby-version: jruby + bundler-cache: true + - name: Run Java Loader test + run: PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS=1 PRISM_JAVA_BACKEND=truffleruby bundle exec rake compile + lex-ruby: runs-on: ubuntu-latest steps: diff --git a/java-wasm/README.md b/java-wasm/README.md index 1c9eaee85e..47aa48a774 100644 --- a/java-wasm/README.md +++ b/java-wasm/README.md @@ -5,7 +5,7 @@ This dir contains the chicory-prism artifact, a version of prism compiled to WAS Generate the templated sources: ``` -PRISM_EXCLUDE_PRETTYPRINT=1 PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS=1 PRISM_JAVA_BACKEND=jruby bundle exec rake templates +PRISM_EXCLUDE_PRETTYPRINT=1 PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS=1 bundle exec rake templates ``` Compile to WASM using WASI SDK version 25: diff --git a/java-wasm/src/test/java/org/jruby/parser/prism/WASMTest.java b/java-wasm/src/test/java/org/jruby/parser/prism/WASMTest.java index 9e2711cd6b..1056075b89 100644 --- a/java-wasm/src/test/java/org/jruby/parser/prism/WASMTest.java +++ b/java-wasm/src/test/java/org/jruby/parser/prism/WASMTest.java @@ -5,6 +5,7 @@ import org.ruby_lang.prism.ParsingOptions; import org.ruby_lang.prism.wasm.Prism; +import java.nio.charset.StandardCharsets; import java.util.EnumSet; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -41,22 +42,6 @@ public void test1() { assertTrue(pr.value.childNodes()[0].toString().contains("IntegerNode")); } - @Test - public void test1Aot() { - // The Ruby source code to be processed - var source = "1 + 1"; - - ParseResult pr = null; - try (Prism prism = new Prism()) { - pr = prism.serializeParse(packedOptions, source); - } - - assertEquals(1, pr.value.childNodes().length); - System.out.println("Nodes:"); - System.out.println(pr.value.childNodes()[0]); - assertTrue(pr.value.childNodes()[0].toString().contains("IntegerNode")); - } - @Test public void test2() { // The Ruby source code to be processed @@ -74,19 +59,19 @@ public void test2() { } @Test - public void test2Aot() { + public void testMBCIdentifier() { // The Ruby source code to be processed - var source = "puts \"h\ne\nl\nl\no\n\""; + var source = new String("hellø = \"hello\"".getBytes(StandardCharsets.UTF_8), StandardCharsets.ISO_8859_1); ParseResult pr = null; try (Prism prism = new Prism()) { pr = prism.serializeParse(packedOptions, source); } - assertEquals(1, pr.value.childNodes().length); System.out.println("Nodes:"); + System.out.println(pr); System.out.println(pr.value.childNodes()[0]); - assertTrue(pr.value.childNodes()[0].toString().contains("CallNode")); + assertTrue(pr.value.childNodes()[0].toString().contains("hell\\xc3\\xb8")); } @Test diff --git a/templates/java/org/ruby_lang/prism/Loader.java.erb b/templates/java/org/ruby_lang/prism/Loader.java.erb index 534d8401ca..6d1a77fd10 100644 --- a/templates/java/org/ruby_lang/prism/Loader.java.erb +++ b/templates/java/org/ruby_lang/prism/Loader.java.erb @@ -1,4 +1,4 @@ -<%- string_type = Prism::Template::JAVA_STRING_TYPE -%> +<%- id_type = Prism::Template::JAVA_IDENTIFIER_TYPE -%> package org.ruby_lang.prism; import java.lang.Short; @@ -19,19 +19,11 @@ public class Loader { // Overridable methods - public Charset getEncodingCharset(String encodingName) { - encodingName = encodingName.toLowerCase(Locale.ROOT); - if (encodingName.equals("ascii-8bit")) { - return StandardCharsets.US_ASCII; - } - return Charset.forName(encodingName); - } - - public <%= string_type %> bytesToName(byte[] bytes) { - <%- if string_type == "String" -%> - return new String(bytes, encodingCharset).intern(); + public <%= id_type %> bytesToName(byte[] bytes) { + <%- if id_type == "byte[]" -%> + return bytes; <%- else -%> - return null; // Must be implemented by subclassing Loader + throw new AbstractMethodError("Loader.bytesToName(<%= id_type %>) is not implemented"); <%- end -%> } @@ -39,17 +31,21 @@ public class Loader { private final Loader loader; private final int bufferOffset; - private final <%= string_type %>[] cache; + private final <%= id_type %>[] cache; ConstantPool(Loader loader, int bufferOffset, int length) { this.loader = loader; this.bufferOffset = bufferOffset; - cache = new <%= string_type %>[length]; + <%- if id_type == "String" -%> + cache = new <%= id_type %>[length]; + <%- else -%> + cache = new byte[length][]; + <%- end -%> } - <%= string_type %> get(ByteBuffer buffer, int oneBasedIndex) { + <%= id_type %> get(ByteBuffer buffer, int oneBasedIndex) { int index = oneBasedIndex - 1; - <%= string_type %> constant = cache[index]; + <%= id_type %> constant = cache[index]; if (constant == null) { int offset = bufferOffset + index * 8; @@ -70,9 +66,6 @@ public class Loader { private final ByteBuffer buffer; protected String encodingName; - <%- if string_type == "String" -%> - private Charset encodingCharset; - <%- end -%> private ConstantPool constantPool; private Nodes.Source source = null; @@ -100,9 +93,6 @@ public class Loader { byte[] encodingNameBytes = new byte[encodingLength]; buffer.get(encodingNameBytes); this.encodingName = new String(encodingNameBytes, StandardCharsets.US_ASCII); - <%- if string_type == "String" -%> - this.encodingCharset = getEncodingCharset(this.encodingName); - <%- end -%> source.setStartLine(loadVarSInt()); source.setLineOffsets(loadLineOffsets()); @@ -213,11 +203,11 @@ public class Loader { } } - private <%= string_type %> loadConstant() { + private <%= id_type %> loadConstant() { return constantPool.get(buffer, loadVarUInt()); } - private <%= string_type %> loadOptionalConstant() { + private <%= id_type %> loadOptionalConstant() { if (buffer.get(buffer.position()) != 0) { return loadConstant(); } else { @@ -226,12 +216,16 @@ public class Loader { } } - private <%= string_type %>[] loadConstants() { + private <%= id_type %>[] loadConstants() { int length = loadVarUInt(); if (length == 0) { - return Nodes.EMPTY_STRING_ARRAY; + return Nodes.EMPTY_IDENTIFIER_ARRAY; } - <%= string_type %>[] constants = new <%= string_type %>[length]; + <%- if id_type == "String" -%> + <%= id_type %>[] constants = new <%= id_type %>[length]; + <%- else -%> + <%= id_type %>[] constants = new byte[length][]; + <%- end -%> for (int i = 0; i < length; i++) { constants[i] = constantPool.get(buffer, loadVarUInt()); } @@ -395,7 +389,7 @@ public class Loader { int bufferPosition = buffer.position(); int serializedLength = buffer.getInt(); // Load everything except the body and locals, because the name, receiver, parameters are still needed for lazily defining the method - Nodes.DefNode lazyDefNode = new Nodes.DefNode(<%= base_params.join(", ") -%>, -bufferPosition, this, loadConstant(), loadOptionalNode(), (Nodes.ParametersNode) loadOptionalNode(), null, Nodes.EMPTY_STRING_ARRAY); + Nodes.DefNode lazyDefNode = new Nodes.DefNode(<%= base_params.join(", ") -%>, -bufferPosition, this, loadConstant(), loadOptionalNode(), (Nodes.ParametersNode) loadOptionalNode(), null, Nodes.EMPTY_IDENTIFIER_ARRAY); buffer.position(bufferPosition + serializedLength); // skip past the serialized DefNode return lazyDefNode; } diff --git a/templates/java/org/ruby_lang/prism/Nodes.java.erb b/templates/java/org/ruby_lang/prism/Nodes.java.erb index de597eea67..9ef03e99cd 100644 --- a/templates/java/org/ruby_lang/prism/Nodes.java.erb +++ b/templates/java/org/ruby_lang/prism/Nodes.java.erb @@ -1,4 +1,4 @@ -<%- string_type = Prism::Template::JAVA_STRING_TYPE -%> +<%- id_type = Prism::Template::JAVA_IDENTIFIER_TYPE -%> package org.ruby_lang.prism; import java.lang.Override; @@ -16,7 +16,7 @@ import java.util.Arrays; // @formatter:off public abstract class Nodes { - public static final <%= string_type %>[] EMPTY_STRING_ARRAY = {}; + public static final <%= id_type %>[] EMPTY_IDENTIFIER_ARRAY = {}; @Target(ElementType.FIELD) @Retention(RetentionPolicy.SOURCE) @@ -139,6 +139,22 @@ public abstract class Nodes { protected abstract String toString(String indent); } + + protected static String asString(Object value) { + return value.toString(); + } + + protected static String asString(byte[] value) { + StringBuilder buf = new StringBuilder(value.length); + for (byte b : value) { + if (b >= 0x20 && b <= 0x7e) { + buf.append((char) b); + } else { + buf.append(String.format("\\x%02x", Byte.toUnsignedInt(b))); + } + } + return buf.toString(); + } <%-# FLAGS -%> <%- flags.each do |flag| -%> @@ -373,18 +389,18 @@ public abstract class Nodes { builder.append(nextNextIndent).append(child.toString(nextNextIndent)); } <%- when Prism::Template::StringField -%> - builder.append('"' + new String(this.<%= field.name %>, StandardCharsets.UTF_8) + '"'); + builder.append('"' + asString(this.<%= field.name %>) + '"'); builder.append('\n'); <%- when Prism::Template::ConstantField -%> - builder.append('"').append(this.<%= field.name %>).append('"'); + builder.append('"').append(asString(this.<%= field.name %>)).append('"'); builder.append('\n'); <%- when Prism::Template::OptionalConstantField -%> - builder.append(this.<%= field.name %> == null ? "null" : "\"" + this.<%= field.name %> + "\""); + builder.append(this.<%= field.name %> == null ? "null" : "\"" + asString(this.<%= field.name %>) + "\""); builder.append('\n'); <%- when Prism::Template::ConstantListField -%> builder.append('\n'); - for (<%= string_type %> constant : this.<%= field.name %>) { - builder.append(nextNextIndent).append('"').append(constant).append('"').append('\n'); + for (<%= id_type %> constant : this.<%= field.name %>) { + builder.append(nextNextIndent).append('"').append(asString(constant)).append('"').append('\n'); } <%- when Prism::Template::Flags -%> builder.append(flags); diff --git a/templates/template.rb b/templates/template.rb index 8f7734dd43..5d1afc9506 100755 --- a/templates/template.rb +++ b/templates/template.rb @@ -11,8 +11,8 @@ module Template # :nodoc: all REMOVE_ON_ERROR_TYPES = SERIALIZE_ONLY_SEMANTICS_FIELDS CHECK_FIELD_KIND = ENV.fetch("CHECK_FIELD_KIND", false) - JAVA_BACKEND = ENV["PRISM_JAVA_BACKEND"] || "truffleruby" - JAVA_STRING_TYPE = JAVA_BACKEND == "jruby" ? "org.jruby.RubySymbol" : "String" + JAVA_BACKEND = ENV["PRISM_JAVA_BACKEND"] || "default" + JAVA_IDENTIFIER_TYPE = JAVA_BACKEND == "truffleruby" ? "String" : "byte[]" INCLUDE_NODE_ID = !SERIALIZE_ONLY_SEMANTICS_FIELDS || JAVA_BACKEND == "jruby" COMMON_FLAGS_COUNT = 2 @@ -272,7 +272,7 @@ def call_seq_type end def java_type - JAVA_STRING_TYPE + JAVA_IDENTIFIER_TYPE end end @@ -292,7 +292,7 @@ def call_seq_type end def java_type - JAVA_STRING_TYPE + JAVA_IDENTIFIER_TYPE end end @@ -312,7 +312,7 @@ def call_seq_type end def java_type - "#{JAVA_STRING_TYPE}[]" + "#{JAVA_IDENTIFIER_TYPE}[]" end end