forked from Qortal/qortal
Merge pull request #75 from catbref/name-unicode
Unicode / NAME updates.
This commit is contained in:
commit
39c06d8817
15
pom.xml
15
pom.xml
@ -21,6 +21,8 @@
|
|||||||
<dagger.version>1.2.2</dagger.version>
|
<dagger.version>1.2.2</dagger.version>
|
||||||
<guava.version>28.1-jre</guava.version>
|
<guava.version>28.1-jre</guava.version>
|
||||||
<hsqldb.version>2.5.1</hsqldb.version>
|
<hsqldb.version>2.5.1</hsqldb.version>
|
||||||
|
<homoglyph.version>1.2.1</homoglyph.version>
|
||||||
|
<icu4j.version>70.1</icu4j.version>
|
||||||
<upnp.version>1.1</upnp.version>
|
<upnp.version>1.1</upnp.version>
|
||||||
<jersey.version>2.29.1</jersey.version>
|
<jersey.version>2.29.1</jersey.version>
|
||||||
<jetty.version>9.4.29.v20200521</jetty.version>
|
<jetty.version>9.4.29.v20200521</jetty.version>
|
||||||
@ -568,7 +570,18 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>net.codebox</groupId>
|
<groupId>net.codebox</groupId>
|
||||||
<artifactId>homoglyph</artifactId>
|
<artifactId>homoglyph</artifactId>
|
||||||
<version>1.2.0</version>
|
<version>${homoglyph.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<!-- Unicode support -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.ibm.icu</groupId>
|
||||||
|
<artifactId>icu4j</artifactId>
|
||||||
|
<version>${icu4j.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.ibm.icu</groupId>
|
||||||
|
<artifactId>icu4j-charset</artifactId>
|
||||||
|
<version>${icu4j.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<!-- Jetty -->
|
<!-- Jetty -->
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -48,6 +48,7 @@ public class UpdateNameTransactionData extends TransactionData {
|
|||||||
|
|
||||||
public void afterUnmarshal(Unmarshaller u, Object parent) {
|
public void afterUnmarshal(Unmarshaller u, Object parent) {
|
||||||
this.creatorPublicKey = this.ownerPublicKey;
|
this.creatorPublicKey = this.ownerPublicKey;
|
||||||
|
this.reducedNewName = this.newName != null ? Unicode.sanitize(this.newName) : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** From repository */
|
/** From repository */
|
||||||
@ -62,7 +63,7 @@ public class UpdateNameTransactionData extends TransactionData {
|
|||||||
this.nameReference = nameReference;
|
this.nameReference = nameReference;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** From network/API */
|
/** From network */
|
||||||
public UpdateNameTransactionData(BaseTransactionData baseTransactionData, String name, String newName, String newData) {
|
public UpdateNameTransactionData(BaseTransactionData baseTransactionData, String name, String newName, String newData) {
|
||||||
this(baseTransactionData, name, newName, newData, Unicode.sanitize(newName), null);
|
this(baseTransactionData, name, newName, newData, Unicode.sanitize(newName), null);
|
||||||
}
|
}
|
||||||
|
@ -118,10 +118,13 @@ public class UpdateNameTransaction extends Transaction {
|
|||||||
if (!owner.getAddress().equals(nameData.getOwner()))
|
if (!owner.getAddress().equals(nameData.getOwner()))
|
||||||
return ValidationResult.INVALID_NAME_OWNER;
|
return ValidationResult.INVALID_NAME_OWNER;
|
||||||
|
|
||||||
// Check new name isn't already taken, unless it is the same name (this allows for case-adjusting renames)
|
// Additional checks if transaction intends to change name
|
||||||
NameData newNameData = this.repository.getNameRepository().fromReducedName(this.updateNameTransactionData.getReducedNewName());
|
if (!this.updateNameTransactionData.getNewName().isEmpty()) {
|
||||||
if (newNameData != null && !newNameData.getName().equals(nameData.getName()))
|
// Check new name isn't already taken, unless it is the same name (this allows for case-adjusting renames)
|
||||||
return ValidationResult.NAME_ALREADY_REGISTERED;
|
NameData newNameData = this.repository.getNameRepository().fromReducedName(this.updateNameTransactionData.getReducedNewName());
|
||||||
|
if (newNameData != null && !newNameData.getName().equals(nameData.getName()))
|
||||||
|
return ValidationResult.NAME_ALREADY_REGISTERED;
|
||||||
|
}
|
||||||
|
|
||||||
return ValidationResult.OK;
|
return ValidationResult.OK;
|
||||||
}
|
}
|
||||||
|
@ -18,6 +18,9 @@ import java.util.TreeMap;
|
|||||||
|
|
||||||
import com.google.common.base.CharMatcher;
|
import com.google.common.base.CharMatcher;
|
||||||
|
|
||||||
|
import com.ibm.icu.text.CaseMap;
|
||||||
|
import com.ibm.icu.text.Normalizer2;
|
||||||
|
import com.ibm.icu.text.UnicodeSet;
|
||||||
import net.codebox.homoglyph.HomoglyphBuilder;
|
import net.codebox.homoglyph.HomoglyphBuilder;
|
||||||
|
|
||||||
public abstract class Unicode {
|
public abstract class Unicode {
|
||||||
@ -31,6 +34,8 @@ public abstract class Unicode {
|
|||||||
public static final String ZERO_WIDTH_NO_BREAK_SPACE = "\ufeff";
|
public static final String ZERO_WIDTH_NO_BREAK_SPACE = "\ufeff";
|
||||||
|
|
||||||
public static final CharMatcher ZERO_WIDTH_CHAR_MATCHER = CharMatcher.anyOf(ZERO_WIDTH_SPACE + ZERO_WIDTH_NON_JOINER + ZERO_WIDTH_JOINER + WORD_JOINER + ZERO_WIDTH_NO_BREAK_SPACE);
|
public static final CharMatcher ZERO_WIDTH_CHAR_MATCHER = CharMatcher.anyOf(ZERO_WIDTH_SPACE + ZERO_WIDTH_NON_JOINER + ZERO_WIDTH_JOINER + WORD_JOINER + ZERO_WIDTH_NO_BREAK_SPACE);
|
||||||
|
private static final UnicodeSet removableUniset = new UnicodeSet("[[:Mark:][:Other:]]").freeze();
|
||||||
|
|
||||||
|
|
||||||
private static int[] homoglyphCodePoints;
|
private static int[] homoglyphCodePoints;
|
||||||
private static int[] reducedCodePoints;
|
private static int[] reducedCodePoints;
|
||||||
@ -59,7 +64,7 @@ public abstract class Unicode {
|
|||||||
public static String normalize(String input) {
|
public static String normalize(String input) {
|
||||||
String output;
|
String output;
|
||||||
|
|
||||||
// Normalize
|
// Normalize using NFKC to recompose in canonical form
|
||||||
output = Normalizer.normalize(input, Form.NFKC);
|
output = Normalizer.normalize(input, Form.NFKC);
|
||||||
|
|
||||||
// Remove zero-width code-points, used for rendering
|
// Remove zero-width code-points, used for rendering
|
||||||
@ -91,8 +96,8 @@ public abstract class Unicode {
|
|||||||
public static String sanitize(String input) {
|
public static String sanitize(String input) {
|
||||||
String output;
|
String output;
|
||||||
|
|
||||||
// Normalize
|
// Normalize using NFKD to decompose into individual combining code points
|
||||||
output = Normalizer.normalize(input, Form.NFKD);
|
output = Normalizer2.getNFKDInstance().normalize(input);
|
||||||
|
|
||||||
// Remove zero-width code-points, used for rendering
|
// Remove zero-width code-points, used for rendering
|
||||||
output = removeZeroWidth(output);
|
output = removeZeroWidth(output);
|
||||||
@ -100,11 +105,11 @@ public abstract class Unicode {
|
|||||||
// Normalize whitespace
|
// Normalize whitespace
|
||||||
output = CharMatcher.whitespace().trimAndCollapseFrom(output, ' ');
|
output = CharMatcher.whitespace().trimAndCollapseFrom(output, ' ');
|
||||||
|
|
||||||
// Remove accents, combining marks
|
// Remove accents, combining marks - see https://www.unicode.org/reports/tr44/#GC_Values_Table
|
||||||
output = output.replaceAll("[\\p{M}\\p{C}]", "");
|
output = removableUniset.stripFrom(output, true);
|
||||||
|
|
||||||
// Convert to lowercase
|
// Convert to lowercase
|
||||||
output = output.toLowerCase(Locale.ROOT);
|
output = CaseMap.toLower().apply(Locale.ROOT, output);
|
||||||
|
|
||||||
// Reduce homoglyphs
|
// Reduce homoglyphs
|
||||||
output = reduceHomoglyphs(output);
|
output = reduceHomoglyphs(output);
|
||||||
|
@ -35,4 +35,41 @@ public class UnicodeTests {
|
|||||||
assertEquals("strings should match", Unicode.sanitize(input1), Unicode.sanitize(input2));
|
assertEquals("strings should match", Unicode.sanitize(input1), Unicode.sanitize(input2));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEmojis() {
|
||||||
|
/*
|
||||||
|
* Emojis shouldn't reduce down to empty strings.
|
||||||
|
*
|
||||||
|
* 🥳 Face with Party Horn and Party Hat Emoji U+1F973
|
||||||
|
*/
|
||||||
|
String emojis = "\uD83E\uDD73";
|
||||||
|
|
||||||
|
assertFalse(Unicode.sanitize(emojis).isBlank());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSanitize() {
|
||||||
|
/*
|
||||||
|
* Check various code points that should be stripped out when sanitizing / reducing
|
||||||
|
*/
|
||||||
|
String enclosingCombiningMark = "\u1100\u1161\u20DD"; // \u20DD is an enclosing combining mark and should be removed
|
||||||
|
String spacingMark = "\u0A39\u0A3f"; // \u0A3f is spacing combining mark and should be removed
|
||||||
|
String nonspacingMark = "c\u0302"; // \u0302 is a non-spacing combining mark and should be removed
|
||||||
|
|
||||||
|
assertNotSame(enclosingCombiningMark, Unicode.sanitize(enclosingCombiningMark));
|
||||||
|
assertNotSame(spacingMark, Unicode.sanitize(spacingMark));
|
||||||
|
assertNotSame(nonspacingMark, Unicode.sanitize(nonspacingMark));
|
||||||
|
|
||||||
|
String control = "\u001B\u009E"; // \u001B and \u009E are a control codes
|
||||||
|
String format = "\u202A\u2062"; // \u202A and \u2062 are zero-width formatting codes
|
||||||
|
String surrogate = "\uD800\uDFFF"; // surrogates
|
||||||
|
String privateUse = "\uE1E0"; // \uE000 - \uF8FF is private use area
|
||||||
|
String unassigned = "\uFAFA"; // \uFAFA is currently unassigned
|
||||||
|
|
||||||
|
assertTrue(Unicode.sanitize(control).isBlank());
|
||||||
|
assertTrue(Unicode.sanitize(format).isBlank());
|
||||||
|
assertTrue(Unicode.sanitize(surrogate).isBlank());
|
||||||
|
assertTrue(Unicode.sanitize(privateUse).isBlank());
|
||||||
|
assertTrue(Unicode.sanitize(unassigned).isBlank());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,20 +1,26 @@
|
|||||||
package org.qortal.test.naming;
|
package org.qortal.test.naming;
|
||||||
|
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
|
import org.junit.Ignore;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.qortal.account.PrivateKeyAccount;
|
import org.qortal.account.PrivateKeyAccount;
|
||||||
import org.qortal.controller.repository.NamesDatabaseIntegrityCheck;
|
import org.qortal.controller.repository.NamesDatabaseIntegrityCheck;
|
||||||
|
import org.qortal.data.naming.NameData;
|
||||||
import org.qortal.data.transaction.*;
|
import org.qortal.data.transaction.*;
|
||||||
import org.qortal.repository.DataException;
|
import org.qortal.repository.DataException;
|
||||||
import org.qortal.repository.Repository;
|
import org.qortal.repository.Repository;
|
||||||
|
import org.qortal.repository.RepositoryFactory;
|
||||||
import org.qortal.repository.RepositoryManager;
|
import org.qortal.repository.RepositoryManager;
|
||||||
|
import org.qortal.repository.hsqldb.HSQLDBRepositoryFactory;
|
||||||
|
import org.qortal.settings.Settings;
|
||||||
import org.qortal.test.common.Common;
|
import org.qortal.test.common.Common;
|
||||||
import org.qortal.test.common.TransactionUtils;
|
import org.qortal.test.common.TransactionUtils;
|
||||||
import org.qortal.test.common.transaction.TestTransaction;
|
import org.qortal.test.common.transaction.TestTransaction;
|
||||||
import org.qortal.transaction.RegisterNameTransaction;
|
import org.qortal.transaction.RegisterNameTransaction;
|
||||||
import org.qortal.transaction.Transaction;
|
import org.qortal.transaction.Transaction;
|
||||||
import org.qortal.utils.NTP;
|
import org.qortal.utils.Unicode;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import static org.junit.Assert.*;
|
import static org.junit.Assert.*;
|
||||||
@ -50,34 +56,6 @@ public class IntegrityTests extends Common {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testBlankReducedName() throws DataException {
|
|
||||||
try (final Repository repository = RepositoryManager.getRepository()) {
|
|
||||||
// Register-name
|
|
||||||
PrivateKeyAccount alice = Common.getTestAccount(repository, "alice");
|
|
||||||
String name = "\uD83E\uDD73"; // Translates to a reducedName of ""
|
|
||||||
String data = "\uD83E\uDD73";
|
|
||||||
|
|
||||||
RegisterNameTransactionData transactionData = new RegisterNameTransactionData(TestTransaction.generateBase(alice), name, data);
|
|
||||||
transactionData.setFee(new RegisterNameTransaction(null, null).getUnitFee(transactionData.getTimestamp()));
|
|
||||||
TransactionUtils.signAndMint(repository, transactionData, alice);
|
|
||||||
|
|
||||||
// Ensure the name exists and the data is correct
|
|
||||||
assertEquals(data, repository.getNameRepository().fromName(name).getData());
|
|
||||||
|
|
||||||
// Ensure the reducedName is blank
|
|
||||||
assertEquals("", repository.getNameRepository().fromName(name).getReducedName());
|
|
||||||
|
|
||||||
// Run the database integrity check for this name
|
|
||||||
NamesDatabaseIntegrityCheck integrityCheck = new NamesDatabaseIntegrityCheck();
|
|
||||||
assertEquals(1, integrityCheck.rebuildName(name, repository));
|
|
||||||
|
|
||||||
// Ensure the name still exists and the data is still correct
|
|
||||||
assertEquals(data, repository.getNameRepository().fromName(name).getData());
|
|
||||||
assertEquals("", repository.getNameRepository().fromName(name).getReducedName());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testUpdateWithBlankNewName() throws DataException {
|
public void testUpdateWithBlankNewName() throws DataException {
|
||||||
try (final Repository repository = RepositoryManager.getRepository()) {
|
try (final Repository repository = RepositoryManager.getRepository()) {
|
||||||
@ -448,4 +426,46 @@ public class IntegrityTests extends Common {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Ignore("Checks 'live' repository")
|
||||||
|
@Test
|
||||||
|
public void testRepository() throws DataException {
|
||||||
|
Settings.fileInstance("settings.json"); // use 'live' settings
|
||||||
|
|
||||||
|
String repositoryUrlTemplate = "jdbc:hsqldb:file:%s" + File.separator + "blockchain;create=false;hsqldb.full_log_replay=true";
|
||||||
|
String connectionUrl = String.format(repositoryUrlTemplate, Settings.getInstance().getRepositoryPath());
|
||||||
|
RepositoryFactory repositoryFactory = new HSQLDBRepositoryFactory(connectionUrl);
|
||||||
|
RepositoryManager.setRepositoryFactory(repositoryFactory);
|
||||||
|
|
||||||
|
try (final Repository repository = RepositoryManager.getRepository()) {
|
||||||
|
List<NameData> names = repository.getNameRepository().getAllNames();
|
||||||
|
|
||||||
|
for (NameData nameData : names) {
|
||||||
|
String reReduced = Unicode.sanitize(nameData.getName());
|
||||||
|
|
||||||
|
if (reReduced.isBlank()) {
|
||||||
|
System.err.println(String.format("Name '%s' reduced to blank",
|
||||||
|
nameData.getName()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!nameData.getReducedName().equals(reReduced)) {
|
||||||
|
System.out.println(String.format("Name '%s' reduced form was '%s' but is now '%s'",
|
||||||
|
nameData.getName(),
|
||||||
|
nameData.getReducedName(),
|
||||||
|
reReduced
|
||||||
|
));
|
||||||
|
|
||||||
|
// ...but does another name already have this reduced form?
|
||||||
|
names.stream()
|
||||||
|
.filter(tmpNameData -> tmpNameData.getReducedName().equals(reReduced))
|
||||||
|
.forEach(tmpNameData ->
|
||||||
|
System.err.println(String.format("Name '%s' new reduced form also matches name '%s'",
|
||||||
|
nameData.getName(),
|
||||||
|
tmpNameData.getName()
|
||||||
|
))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user