From 63036f359245f468cbb4979e1bef69cb9a8537f8 Mon Sep 17 00:00:00 2001 From: catbref Date: Fri, 26 Jul 2019 17:04:47 +0100 Subject: [PATCH] Increased NTP check interval from 5 minutes to 10 minutes. Controller batches SysTray updates into one per second. Block generation only allowed by Controller is clock known to be accurate. ('not sure' stops generation). NTP MAX_STDDEV increased from 25ms to 125ms to cater for poorly connected nodes. Controller sends peers list over outbound peer connections, requests peers list from inbound peer connections. When peer handshake completes, Network & Controller only send initial messages over outbound peer connections. This is to fix HEIGHT_V2 messages being processed out-of-order breaking handshaking, as indicated by log entries like: 2019-07-26 16:16:35 DEBUG Network:702 - Unexpected HEIGHT_V2 message from xx.xxx.xx.xx:pppp, expected PROOF 2019-07-26 16:16:35 DEBUG Network:840 - Handshake completed with peer xx.xxx.xx.xx:pppp Increased connection failure backoff from 1 minute to 5 minutes, as handshake timeout is 1 minute and then nodes would immediately reconnect. Changed default NTP servers from asia to cn. --- .../java/org/qora/controller/Controller.java | 111 ++++++++++-------- src/main/java/org/qora/network/Network.java | 41 ++++--- src/main/java/org/qora/network/Peer.java | 8 +- src/main/java/org/qora/settings/Settings.java | 10 +- src/main/java/org/qora/utils/NTP.java | 8 +- 5 files changed, 99 insertions(+), 79 deletions(-) diff --git a/src/main/java/org/qora/controller/Controller.java b/src/main/java/org/qora/controller/Controller.java index 39daccd6..ad36c6ee 100644 --- a/src/main/java/org/qora/controller/Controller.java +++ b/src/main/java/org/qora/controller/Controller.java @@ -91,12 +91,13 @@ public class Controller extends Thread { private static final String repositoryUrlTemplate = "jdbc:hsqldb:file:%s/blockchain;create=true;hsqldb.full_log_replay=true"; private static final long ARBITRARY_REQUEST_TIMEOUT = 5 * 1000; // ms private static final long REPOSITORY_BACKUP_PERIOD = 123 * 60 * 1000; // ms - private static final long NTP_CHECK_PERIOD = 5 * 60 * 1000; // ms + private static final long NTP_CHECK_PERIOD = 10 * 60 * 1000; // ms private static final long MAX_NTP_OFFSET = 500; // ms private static volatile boolean isStopping = false; private static BlockGenerator blockGenerator = null; private static volatile boolean requestSync = false; + private static volatile boolean requestSysTrayUpdate = false; private static Controller instance; private final String buildVersion; @@ -105,7 +106,7 @@ public class Controller extends Thread { private long repositoryBackupTimestamp = startTime + REPOSITORY_BACKUP_PERIOD; private long ntpCheckTimestamp = startTime; // ms /** Whether BlockGenerator is allowed to generate blocks. Mostly determined by system clock accuracy. */ - private boolean isGenerationAllowed = false; + private volatile boolean isGenerationAllowed = false; /** Signature of peer's latest block when we tried to sync but peer had inferior chain. */ private byte[] inferiorChainPeerBlockSignature = null; @@ -329,11 +330,8 @@ public class Controller extends Thread { // Potentially nag end-user about NTP if (System.currentTimeMillis() >= ntpCheckTimestamp) { ntpCheckTimestamp += NTP_CHECK_PERIOD; - Boolean isClockAccurate = ntpCheck(); - if (isClockAccurate != null) { - isGenerationAllowed = isClockAccurate; - updateSysTray(); - } + isGenerationAllowed = ntpCheck(); + requestSysTrayUpdate = true; } // Prune stuck/slow/old peers @@ -342,6 +340,12 @@ public class Controller extends Thread { } catch (DataException e) { LOGGER.warn(String.format("Repository issue when trying to prune peers: %s", e.getMessage())); } + + // Maybe update SysTray + if (requestSysTrayUpdate) { + requestSysTrayUpdate = false; + updateSysTray(); + } } } catch (InterruptedException e) { // Fall-through to exit @@ -418,7 +422,7 @@ public class Controller extends Thread { break; case OK: - updateSysTray(); + requestSysTrayUpdate = true; // fall-through... case NOTHING_TO_DO: LOGGER.debug(() -> String.format("Synchronized with peer %s (%s)", peer, syncResult.name())); @@ -435,14 +439,14 @@ public class Controller extends Thread { /** * Nag if we detect system clock is too far from internet time. * - * @return true if clock is accurate, false if inaccurate, null if we don't know. + * @return true if clock is accurate, false if inaccurate or we don't know. */ - private Boolean ntpCheck() { + private boolean ntpCheck() { // Fetch mean offset from internet time (ms). Long meanOffset = NTP.getOffset(); final boolean isWindows = System.getProperty("os.name").toLowerCase().contains("win"); - Boolean isNtpActive = null; + boolean isNtpActive = false; if (isWindows) { // Detecting Windows Time service @@ -472,24 +476,22 @@ public class Controller extends Thread { } } - // If offset is good and ntp is active then we're good - if (meanOffset != null && Math.abs(meanOffset) < MAX_NTP_OFFSET && isNtpActive == true) - return true; + LOGGER.info(String.format("NTP mean offset %s, NTP service active: %s", meanOffset, isNtpActive)); - // Time to nag - String caption = Translator.INSTANCE.translate("SysTray", "NTP_NAG_CAPTION"); - String text = Translator.INSTANCE.translate("SysTray", isWindows ? "NTP_NAG_TEXT_WINDOWS" : "NTP_NAG_TEXT_UNIX"); - SysTray.getInstance().showMessage(caption, text, MessageType.WARNING); + final boolean isOffsetGood = meanOffset != null && Math.abs(meanOffset) < MAX_NTP_OFFSET; - if (meanOffset == null) - // We don't know if we're inaccurate - return null; + // If offset bad or NTP not active then nag + if (!isOffsetGood || !isNtpActive) { + String caption = Translator.INSTANCE.translate("SysTray", "NTP_NAG_CAPTION"); + String text = Translator.INSTANCE.translate("SysTray", isWindows ? "NTP_NAG_TEXT_WINDOWS" : "NTP_NAG_TEXT_UNIX"); + SysTray.getInstance().showMessage(caption, text, MessageType.WARNING); + } // Return whether we're accurate (disregarding whether NTP service is active) - return Math.abs(meanOffset) < MAX_NTP_OFFSET; + return isOffsetGood; } - public void updateSysTray() { + private void updateSysTray() { final int numberOfPeers = Network.getInstance().getUniqueHandshakedPeers().size(); final int height = getChainHeight(); @@ -565,17 +567,14 @@ public class Controller extends Thread { public void doNetworkBroadcast() { Network network = Network.getInstance(); - // Send our known peers - network.broadcast(peer -> network.buildPeersMessage(peer)); + // Send (if outbound) / Request peer lists + network.broadcast(peer -> peer.isOutbound() ? network.buildPeersMessage(peer) : new GetPeersMessage()); // Send our current height BlockData latestBlockData = getChainTip(); network.broadcast(peer -> network.buildHeightMessage(peer, latestBlockData)); - // Request peers lists - network.broadcast(peer -> new GetPeersMessage()); - - // Request unconfirmed transaction signatures + // Send (if outbound) / Request unconfirmed transaction signatures network.broadcast(peer -> network.buildGetUnconfirmedTransactionsMessage(peer)); } @@ -594,39 +593,42 @@ public class Controller extends Thread { } public void onPeerHandshakeCompleted(Peer peer) { - if (peer.getVersion() < 2) { - // Legacy mode + // Only send if outbound + if (peer.isOutbound()) { + if (peer.getVersion() < 2) { + // Legacy mode - // Send our unconfirmed transactions - try (final Repository repository = RepositoryManager.getRepository()) { - List transactions = repository.getTransactionRepository().getUnconfirmedTransactions(); + // Send our unconfirmed transactions + try (final Repository repository = RepositoryManager.getRepository()) { + List transactions = repository.getTransactionRepository().getUnconfirmedTransactions(); - for (TransactionData transactionData : transactions) { - Message transactionMessage = new TransactionMessage(transactionData); - if (!peer.sendMessage(transactionMessage)) { - peer.disconnect("failed to send unconfirmed transaction"); - return; + for (TransactionData transactionData : transactions) { + Message transactionMessage = new TransactionMessage(transactionData); + if (!peer.sendMessage(transactionMessage)) { + peer.disconnect("failed to send unconfirmed transaction"); + return; + } } + } catch (DataException e) { + LOGGER.error("Repository issue while sending unconfirmed transactions", e); } - } catch (DataException e) { - LOGGER.error("Repository issue while sending unconfirmed transactions", e); - } - } else { - // V2 protocol + } else { + // V2 protocol - // Request peer's unconfirmed transactions - Message message = new GetUnconfirmedTransactionsMessage(); - if (!peer.sendMessage(message)) { - peer.disconnect("failed to send request for unconfirmed transactions"); - return; + // Request peer's unconfirmed transactions + Message message = new GetUnconfirmedTransactionsMessage(); + if (!peer.sendMessage(message)) { + peer.disconnect("failed to send request for unconfirmed transactions"); + return; + } } } - updateSysTray(); + requestSysTrayUpdate = true; } public void onPeerDisconnect(Peer peer) { - updateSysTray(); + requestSysTrayUpdate = true; } public void onNetworkMessage(Peer peer, Message message) { @@ -655,10 +657,17 @@ public class Controller extends Thread { case HEIGHT_V2: { HeightV2Message heightV2Message = (HeightV2Message) message; + // If peer is inbound and we've not updated their height + // then this is probably their initial HEIGHT_V2 message + // so they need a corresponding HEIGHT_V2 message from us + if (!peer.isOutbound() && peer.getLastHeight() == null) + peer.sendMessage(Network.getInstance().buildHeightMessage(peer, getChainTip())); + // Update all peers with same ID List connectedPeers = Network.getInstance().getHandshakedPeers(); for (Peer connectedPeer : connectedPeers) { + // Skip connectedPeer if they have no ID or their ID doesn't match sender's ID if (connectedPeer.getPeerId() == null || !Arrays.equals(connectedPeer.getPeerId(), peer.getPeerId())) continue; diff --git a/src/main/java/org/qora/network/Network.java b/src/main/java/org/qora/network/Network.java index 70bb2c55..f5d9da25 100644 --- a/src/main/java/org/qora/network/Network.java +++ b/src/main/java/org/qora/network/Network.java @@ -63,7 +63,7 @@ public class Network extends Thread { private static final int LISTEN_BACKLOG = 10; /** How long before retrying after a connection failure, in milliseconds. */ - private static final int CONNECT_FAILURE_BACKOFF = 60 * 1000; // ms + private static final int CONNECT_FAILURE_BACKOFF = 5 * 60 * 1000; // ms /** How long between informational broadcasts to all connected peers, in milliseconds. */ private static final int BROADCAST_INTERVAL = 60 * 1000; // ms /** Maximum time since last successful connection for peer info to be propagated, in milliseconds. */ @@ -508,7 +508,7 @@ public class Network extends Thread { handshakePeers.removeIf(peer -> peer.getHandshakeStatus() == Handshake.COMPLETED || peer.getConnectionTimestamp() == null || peer.getConnectionTimestamp() > now - HANDSHAKE_TIMEOUT); for (Peer peer : handshakePeers) - peer.disconnect("handshake timeout"); + peer.disconnect(String.format("handshake timeout at %s", peer.getHandshakeStatus().name())); // Prune 'old' peers from repository... try (final Repository repository = RepositoryManager.getRepository()) { @@ -848,24 +848,29 @@ public class Network extends Thread { // Start regular pings peer.startPings(); - // Send our height - Message heightMessage = buildHeightMessage(peer, Controller.getInstance().getChainTip()); - if (!peer.sendMessage(heightMessage)) { - peer.disconnect("failed to send height/info"); - return; + // Only the outbound side needs to send anything (after we've received handshake-completing response). + // (If inbound sent anything here, it's possible it could be processed out-of-order with handshake message). + + if (peer.isOutbound()) { + // Send our height + Message heightMessage = buildHeightMessage(peer, Controller.getInstance().getChainTip()); + if (!peer.sendMessage(heightMessage)) { + peer.disconnect("failed to send height/info"); + return; + } + + // Send our peers list + Message peersMessage = this.buildPeersMessage(peer); + if (!peer.sendMessage(peersMessage)) + peer.disconnect("failed to send peers list"); + + // Request their peers list + Message getPeersMessage = new GetPeersMessage(); + if (!peer.sendMessage(getPeersMessage)) + peer.disconnect("failed to request peers list"); } - // Send our peers list - Message peersMessage = this.buildPeersMessage(peer); - if (!peer.sendMessage(peersMessage)) - peer.disconnect("failed to send peers list"); - - // Request their peers list - Message getPeersMessage = new GetPeersMessage(); - if (!peer.sendMessage(getPeersMessage)) - peer.disconnect("failed to request peers list"); - - // Ask Controller if they want to send anything + // Ask Controller if they want to do anything Controller.getInstance().onPeerHandshakeCompleted(peer); } diff --git a/src/main/java/org/qora/network/Peer.java b/src/main/java/org/qora/network/Peer.java index ee828ac4..068cdcb6 100644 --- a/src/main/java/org/qora/network/Peer.java +++ b/src/main/java/org/qora/network/Peer.java @@ -483,7 +483,8 @@ public class Peer { } public void disconnect(String reason) { - LOGGER.debug(String.format("Disconnecting peer %s: %s", this, reason)); + if (!isStopping) + LOGGER.debug(() -> String.format("Disconnecting peer %s: %s", this, reason)); this.shutdown(); @@ -491,7 +492,10 @@ public class Peer { } public void shutdown() { - LOGGER.debug(() -> String.format("Shutting down peer %s", this)); + if (!isStopping) + LOGGER.debug(() -> String.format("Shutting down peer %s", this)); + + isStopping = true; if (this.socketChannel.isOpen()) { try { diff --git a/src/main/java/org/qora/settings/Settings.java b/src/main/java/org/qora/settings/Settings.java index 741416ff..14bea809 100644 --- a/src/main/java/org/qora/settings/Settings.java +++ b/src/main/java/org/qora/settings/Settings.java @@ -102,11 +102,11 @@ public class Settings { "1.pool.ntp.org", "2.pool.ntp.org", "3.pool.ntp.org", - "asia.pool.ntp.org", - "0.asia.pool.ntp.org", - "1.asia.pool.ntp.org", - "2.asia.pool.ntp.org", - "3.asia.pool.ntp.org" + "cn.pool.ntp.org", + "0.cn.pool.ntp.org", + "1.cn.pool.ntp.org", + "2.cn.pool.ntp.org", + "3.cn.pool.ntp.org" }; // Constructors diff --git a/src/main/java/org/qora/utils/NTP.java b/src/main/java/org/qora/utils/NTP.java index 414f2fc8..16529393 100644 --- a/src/main/java/org/qora/utils/NTP.java +++ b/src/main/java/org/qora/utils/NTP.java @@ -14,7 +14,7 @@ import org.qora.settings.Settings; public class NTP { private static final Logger LOGGER = LogManager.getLogger(NTP.class); - private static final double MAX_STDDEV = 25; // ms + private static final double MAX_STDDEV = 125; // ms /** * Returns aggregated internet time. @@ -71,7 +71,7 @@ public class NTP { } if (offsets.size() < ntpServers.length / 2) { - LOGGER.debug("Not enough replies"); + LOGGER.info(String.format("Not enough replies: %d, minimum is %d", offsets.size(), ntpServers.length / 2)); return null; } @@ -90,8 +90,10 @@ public class NTP { double stddev = Math.sqrt(((s0 * s2) - (s1 * s1)) / (s0 * (s0 - 1))); // If stddev is excessive then we're not very sure so give up - if (stddev > MAX_STDDEV) + if (stddev > MAX_STDDEV) { + LOGGER.info(String.format("Excessive standard deviation %.1f, maximum is %.1f", stddev, MAX_STDDEV)); return null; + } return (long) mean; }