From 0315b3a5e6898bb504ceb60fb6474ed90a31105d Mon Sep 17 00:00:00 2001 From: Mike Hearn Date: Thu, 15 Sep 2011 16:11:32 +0000 Subject: [PATCH] Add a create method to Sha256Hash. Don't deserialize block or tx messages that were already seen, to avoid wasting battery and causing memory spikes that can trigger OOM conditions. Updates issue 73. --- .../bitcoin/core/BitcoinSerializer.java | 77 ++++++++++++++++--- .../bitcoin/core/NetworkConnection.java | 30 ++++++-- src/com/google/bitcoin/core/Peer.java | 8 +- src/com/google/bitcoin/core/Sha256Hash.java | 12 +++ .../bitcoin/core/BitcoinSerializerTest.java | 50 ++++++++++-- 5 files changed, 151 insertions(+), 26 deletions(-) diff --git a/src/com/google/bitcoin/core/BitcoinSerializer.java b/src/com/google/bitcoin/core/BitcoinSerializer.java index a94139b4..472d0183 100644 --- a/src/com/google/bitcoin/core/BitcoinSerializer.java +++ b/src/com/google/bitcoin/core/BitcoinSerializer.java @@ -25,21 +25,25 @@ import java.io.InputStream; import java.io.OutputStream; import java.io.UnsupportedEncodingException; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.Map; import static com.google.bitcoin.core.Utils.*; /** - * Methods to serialize and de-serialize messages to the bitcoin network format as defined in the bitcoin protocol - * specification at https://en.bitcoin.it/wiki/Protocol_specification + * Methods to serialize and de-serialize messages to the bitcoin network format as defined in + * the bitcoin protocol specification.

* * To be able to serialize and deserialize new Message subclasses the following criteria needs to be met. *

+ *

* + * BitcoinSerializers can be given a map which will be locked during reading/deserialization. This is used to + * avoid deserializing identical messages more than once, which is helpful in memory-constrained environments like + * smartphones. */ public class BitcoinSerializer { private static final Logger log = LoggerFactory.getLogger(BitcoinSerializer.class); @@ -63,15 +67,38 @@ public class BitcoinSerializer { names.put(GetAddrMessage.class, "getaddr"); } + /** + * A doubly-linked map of message-hash to counts. When a new message is received we increment the count in + * this list. The count isn't currently used, but will be helpful later to know how many peers relayed a + * particular transaction. We can use that as a heuristic to estimate validity. + */ + private LinkedHashMap dedupeList; + + /* + * Returns a {@link LinkedHashMap} that evicts old entries, making it suitable for passing to the constructor + * if you wish to use message deduplication. + */ + public static LinkedHashMap createDedupeList() { + return new LinkedHashMap() { + @Override + protected boolean removeEldestEntry(Map.Entry entry) { + // Keep 100 message hashcodes in the list. This choice is fairly arbitrary. + return size() > 100; + } + }; + } + /** * Constructs a BitcoinSerializer with the given behavior. * * @param params networkParams used to create Messages instances and termining packetMagic * @param usesChecksumming set to true if checkums should be included and expected in headers */ - public BitcoinSerializer(NetworkParameters params, boolean usesChecksumming) { + public BitcoinSerializer(NetworkParameters params, boolean usesChecksumming, + LinkedHashMap dedupeList) { this.params = params; this.usesChecksumming = usesChecksumming; + this.dedupeList = dedupeList; } public void setUseChecksumming(boolean usesChecksumming) { @@ -125,7 +152,10 @@ public class BitcoinSerializer { log.debug("Sending {} message: {}", name, bytesToHexString(header) + bytesToHexString(payload)); } - /** Reads a message from the given InputStream and returns it. */ + /** + * Reads a message from the given InputStream and returns it. If deduping is enabled and the message has already + * been parsed/returned, it will return null. + */ public Message deserialize(InputStream in) throws ProtocolException, IOException { // A BitCoin protocol message has the following format. // @@ -145,7 +175,13 @@ public class BitcoinSerializer { BitcoinPacketHeader header = new BitcoinPacketHeader(usesChecksumming, in); // Now try to read the whole message. return deserializePayload(header, in); + } + private boolean canDedupeMessageType(String command) { + // We don't attempt to deduplicate messages that may be legitimately duplicated like ping or versions nor do + // we dedupe addr messages which are always different even if they contain redundant data. Trying to dedupe + // them would just fill up the shared hashmap. + return command.equals("block") || command.equals("tx"); } /** @@ -157,12 +193,9 @@ public class BitcoinSerializer { } /** - * Deserialize payload only. You must provide a header typically obtained by calling deserializeHeader. - * @param header - * @param in - * @return - * @throws ProtocolException - * @throws IOException + * Deserialize payload only. You must provide a header, typically obtained by calling + * {@link BitcoinSerializer#deserializeHeader}. If the deduping feature is active, may return NULL if the + * message was seen before. */ public Message deserializePayload(BitcoinPacketHeader header, InputStream in) throws ProtocolException, IOException { int readCursor = 0; @@ -175,6 +208,28 @@ public class BitcoinSerializer { readCursor += bytesRead; } + // Check for duplicates. This is to avoid the cost (cpu and memory) of parsing the message twice, which can + // be an issue on constrained devices. + if (dedupeList != null && canDedupeMessageType(header.command)) { + // We use a secure hash here rather than the faster and simpler array hashes because otherwise a malicious + // node on the network could broadcast a message designed to mask a different message. They would not + // necessarily have to be connected directly to this program. + synchronized (dedupeList) { + // Calculate hash inside the lock to avoid unnecessary battery power spent on hashing messages arriving + // on different threads simultaneously. + Sha256Hash hash = Sha256Hash.create(payloadBytes); + Integer count = dedupeList.get(hash); + if (count != null) { + int newCount = count + 1; + log.info("Received duplicate {} message, now seen {} times", header.command, newCount); + dedupeList.put(hash, newCount); + return null; + } else { + dedupeList.put(hash, 1); + } + } + } + // Verify the checksum. if (usesChecksumming) { byte[] hash = doubleDigest(payloadBytes); diff --git a/src/com/google/bitcoin/core/NetworkConnection.java b/src/com/google/bitcoin/core/NetworkConnection.java index 25d56b75..34c15dc3 100644 --- a/src/com/google/bitcoin/core/NetworkConnection.java +++ b/src/com/google/bitcoin/core/NetworkConnection.java @@ -26,12 +26,19 @@ import java.net.InetAddress; import java.net.InetSocketAddress; import java.net.Socket; import java.util.Date; +import java.util.LinkedHashMap; +import java.util.LinkedList; /** * A NetworkConnection handles talking to a remote BitCoin peer at a low level. It understands how to read and write * messages off the network, but doesn't asynchronously communicate with the peer or handle the higher level details * of the protocol. After constructing a NetworkConnection, use a {@link Peer} to hand off communication to a - * background thread. + * background thread.

+ * + * Multiple NetworkConnections will, by default, wait if another NetworkConnection instance is deserializing a + * message and discard duplicates before reading them. This is intended to avoid memory usage spikes in constrained + * environments like Android where deserializing a large message (like a block) on multiple threads simultaneously is + * both wasteful and can cause OOM failures.

* * Construction is blocking whilst the protocol version is negotiated. */ @@ -45,8 +52,9 @@ public class NetworkConnection { private final InetAddress remoteIp; private final NetworkParameters params; private final VersionMessage versionMessage; - private static final boolean PROTOCOL_LOG = false; + // Given to the BitcoinSerializer to de-duplicate messages. + private static final LinkedHashMap dedupeList = BitcoinSerializer.createDedupeList(); private BitcoinSerializer serializer = null; /** @@ -58,10 +66,12 @@ public class NetworkConnection { * @param params Defines which network to connect to and details of the protocol. * @param bestHeight How many blocks are in our best chain * @param connectTimeout Timeout in milliseconds when initially connecting to peer + * @param dedupe Whether to avoid parsing duplicate messages from the network (ie from other peers). * @throws IOException if there is a network related failure. * @throws ProtocolException if the version negotiation failed. */ - public NetworkConnection(PeerAddress peerAddress, NetworkParameters params, int bestHeight, int connectTimeout) + public NetworkConnection(PeerAddress peerAddress, NetworkParameters params, + int bestHeight, int connectTimeout, boolean dedupe) throws IOException, ProtocolException { this.params = params; this.remoteIp = peerAddress.addr; @@ -75,8 +85,8 @@ public class NetworkConnection { out = socket.getOutputStream(); in = socket.getInputStream(); - // the version message never uses checksumming. Update checkumming property after version is read. - this.serializer = new BitcoinSerializer(params, false); + // The version message never uses checksumming. Update checkumming property after version is read. + this.serializer = new BitcoinSerializer(params, false, dedupe ? dedupeList : null); // Announce ourselves. This has to come first to connect to clients beyond v0.30.20.2 which wait to hear // from us until they send their version message back. @@ -116,7 +126,7 @@ public class NetworkConnection { public NetworkConnection(InetAddress inetAddress, NetworkParameters params, int bestHeight, int connectTimeout) throws IOException, ProtocolException { - this(new PeerAddress(inetAddress), params, bestHeight, connectTimeout); + this(new PeerAddress(inetAddress), params, bestHeight, connectTimeout, true); } /** @@ -150,7 +160,13 @@ public class NetworkConnection { * @throws ProtocolException if the message is badly formatted, failed checksum or there was a TCP failure. */ public Message readMessage() throws IOException, ProtocolException { - return serializer.deserialize(in); + Message message; + do { + message = serializer.deserialize(in); + // If message is null, it means deduping was enabled, we read a duplicated message and skipped parsing to + // avoid doing redundant work. So go around and wait for another message. + } while (message == null); + return message; } /** diff --git a/src/com/google/bitcoin/core/Peer.java b/src/com/google/bitcoin/core/Peer.java index 52b64647..39d44cdd 100644 --- a/src/com/google/bitcoin/core/Peer.java +++ b/src/com/google/bitcoin/core/Peer.java @@ -54,6 +54,12 @@ public class Peer { private List eventListeners; + /** + * If true, we do some things that may only make sense on constrained devices like Android phones. Currently this + * only controls message deduplication. + */ + public static boolean MOBILE_OPTIMIZED = true; + /** * Construct a peer that handles the given network connection and reads/writes from the given block chain. Note that * communication won't occur until you call connect(). @@ -97,7 +103,7 @@ public class Peer { */ public synchronized void connect() throws PeerException { try { - conn = new NetworkConnection(address, params, bestHeight, 60000); + conn = new NetworkConnection(address, params, bestHeight, 60000, MOBILE_OPTIMIZED); } catch (IOException ex) { throw new PeerException(ex); } catch (ProtocolException ex) { diff --git a/src/com/google/bitcoin/core/Sha256Hash.java b/src/com/google/bitcoin/core/Sha256Hash.java index c9e2252a..2a2eded5 100644 --- a/src/com/google/bitcoin/core/Sha256Hash.java +++ b/src/com/google/bitcoin/core/Sha256Hash.java @@ -20,6 +20,8 @@ import org.bouncycastle.util.encoders.Hex; import java.io.Serializable; import java.math.BigInteger; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; import java.util.Arrays; /** @@ -45,6 +47,16 @@ public class Sha256Hash implements Serializable { this.bytes = Hex.decode(string); } + /** Calculates the (one-time) hash of contents and returns it as a new wrapped hash. */ + public static Sha256Hash create(byte[] contents) { + try { + MessageDigest digest = MessageDigest.getInstance("SHA-256"); + return new Sha256Hash(digest.digest(contents)); + } catch (NoSuchAlgorithmException e) { + throw new RuntimeException(e); // Cannot happen. + } + } + /** Returns true if the hashes are equal. */ @Override public boolean equals(Object other) { diff --git a/tests/com/google/bitcoin/core/BitcoinSerializerTest.java b/tests/com/google/bitcoin/core/BitcoinSerializerTest.java index 089327c6..92accb0f 100644 --- a/tests/com/google/bitcoin/core/BitcoinSerializerTest.java +++ b/tests/com/google/bitcoin/core/BitcoinSerializerTest.java @@ -21,14 +21,39 @@ import org.bouncycastle.util.encoders.Hex; import org.junit.Test; import java.io.ByteArrayInputStream; +import java.util.LinkedHashMap; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +public class BitcoinSerializerTest { + private final byte[] addrMessage = Hex.decode("f9beb4d96164647200000000000000001f000000" + + "ed52399b01e215104d010000000000000000000000000000000000ffff0a000001208d"); + + private final byte[] txMessage = Hex.decode( + "F9 BE B4 D9 74 78 00 00 00 00 00 00 00 00 00 00" + + "02 01 00 00 E2 93 CD BE 01 00 00 00 01 6D BD DB" + + "08 5B 1D 8A F7 51 84 F0 BC 01 FA D5 8D 12 66 E9" + + "B6 3B 50 88 19 90 E4 B4 0D 6A EE 36 29 00 00 00" + + "00 8B 48 30 45 02 21 00 F3 58 1E 19 72 AE 8A C7" + + "C7 36 7A 7A 25 3B C1 13 52 23 AD B9 A4 68 BB 3A" + + "59 23 3F 45 BC 57 83 80 02 20 59 AF 01 CA 17 D0" + + "0E 41 83 7A 1D 58 E9 7A A3 1B AE 58 4E DE C2 8D" + + "35 BD 96 92 36 90 91 3B AE 9A 01 41 04 9C 02 BF" + + "C9 7E F2 36 CE 6D 8F E5 D9 40 13 C7 21 E9 15 98" + + "2A CD 2B 12 B6 5D 9B 7D 59 E2 0A 84 20 05 F8 FC" + + "4E 02 53 2E 87 3D 37 B9 6F 09 D6 D4 51 1A DA 8F" + + "14 04 2F 46 61 4A 4C 70 C0 F1 4B EF F5 FF FF FF" + + "FF 02 40 4B 4C 00 00 00 00 00 19 76 A9 14 1A A0" + + "CD 1C BE A6 E7 45 8A 7A BA D5 12 A9 D9 EA 1A FB" + + "22 5E 88 AC 80 FA E9 C7 00 00 00 00 19 76 A9 14" + + "0E AB 5B EA 43 6A 04 84 CF AB 12 48 5E FD A0 B7" + + "8B 4E CC 52 88 AC 00 00 00 00"); -public class BitcoinSerializerTest -{ @Test public void testVersion() throws Exception { - BitcoinSerializer bs = new BitcoinSerializer(NetworkParameters.prodNet(), false); + BitcoinSerializer bs = new BitcoinSerializer(NetworkParameters.prodNet(), false, null); // the actual data from https://en.bitcoin.it/wiki/Protocol_specification#version ByteArrayInputStream bais = new ByteArrayInputStream(Hex.decode("f9beb4d976657273696f6e0000000000550000009" + "c7c00000100000000000000e615104d00000000010000000000000000000000000000000000ffff0a000001daf6010000" + @@ -42,7 +67,7 @@ public class BitcoinSerializerTest @Test public void testVerack() throws Exception { - BitcoinSerializer bs = new BitcoinSerializer(NetworkParameters.prodNet(), false); + BitcoinSerializer bs = new BitcoinSerializer(NetworkParameters.prodNet(), false, null); // the actual data from https://en.bitcoin.it/wiki/Protocol_specification#verack ByteArrayInputStream bais = new ByteArrayInputStream(Hex.decode("f9beb4d976657261636b00000000000000000000")); VersionAck va = (VersionAck)bs.deserialize(bais); @@ -51,14 +76,25 @@ public class BitcoinSerializerTest @Test public void testAddr() throws Exception { - BitcoinSerializer bs = new BitcoinSerializer(NetworkParameters.prodNet(), true); + BitcoinSerializer bs = new BitcoinSerializer(NetworkParameters.prodNet(), true, null); // the actual data from https://en.bitcoin.it/wiki/Protocol_specification#addr - ByteArrayInputStream bais = new ByteArrayInputStream(Hex.decode("f9beb4d96164647200000000000000001f000000" + - "ed52399b01e215104d010000000000000000000000000000000000ffff0a000001208d")); + ByteArrayInputStream bais = new ByteArrayInputStream(addrMessage); AddressMessage a = (AddressMessage)bs.deserialize(bais); assertEquals(1, a.addresses.size()); PeerAddress pa = a.addresses.get(0); assertEquals(8333, pa.port); assertEquals("10.0.0.1", pa.addr.getHostAddress()); } + + @Test + public void testDeduplication() throws Exception { + LinkedHashMap dedupeList = BitcoinSerializer.createDedupeList(); + BitcoinSerializer bs = new BitcoinSerializer(NetworkParameters.prodNet(), true, dedupeList); + ByteArrayInputStream bais = new ByteArrayInputStream(txMessage); + Transaction tx = (Transaction)bs.deserialize(bais); + assertNotNull(tx); + bais.reset(); + tx = (Transaction)bs.deserialize(bais); + assertNull(tx); + } }