diff --git a/core/pom.xml b/core/pom.xml
index f552d799..6b80ce02 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -280,6 +280,11 @@
9.1-901.jdbc4
-->
+
+ com.subgraph
+ orchid
+ 1.0-SNAPSHOT
+
diff --git a/core/src/main/java/com/google/bitcoin/net/BlockingClient.java b/core/src/main/java/com/google/bitcoin/net/BlockingClient.java
index ee4c90ae..aad4b831 100644
--- a/core/src/main/java/com/google/bitcoin/net/BlockingClient.java
+++ b/core/src/main/java/com/google/bitcoin/net/BlockingClient.java
@@ -23,6 +23,7 @@ import javax.net.SocketFactory;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import java.net.InetSocketAddress;
import java.net.Socket;
import java.net.SocketAddress;
import java.nio.ByteBuffer;
@@ -65,14 +66,15 @@ public class BlockingClient implements MessageWriteTarget {
// sure it doesnt get too large or have to call read too often.
dbuf = ByteBuffer.allocateDirect(Math.min(Math.max(parser.getMaxMessageSize(), BUFFER_SIZE_LOWER_BOUND), BUFFER_SIZE_UPPER_BOUND));
parser.setWriteTarget(this);
- socket = socketFactory.createSocket();
Thread t = new Thread() {
@Override
public void run() {
if (clientSet != null)
clientSet.add(BlockingClient.this);
try {
- socket.connect(serverAddress, connectTimeoutMillis);
+ InetSocketAddress iServerAddress = (InetSocketAddress)serverAddress;
+ socket = socketFactory.createSocket(iServerAddress.getAddress(), iServerAddress.getPort());
+ //socket.connect(serverAddress, connectTimeoutMillis);
parser.connectionOpened();
InputStream stream = socket.getInputStream();
byte[] readBuff = new byte[dbuf.capacity()];
diff --git a/core/src/main/java/com/google/bitcoin/net/discovery/TorDiscovery.java b/core/src/main/java/com/google/bitcoin/net/discovery/TorDiscovery.java
new file mode 100644
index 00000000..23b40388
--- /dev/null
+++ b/core/src/main/java/com/google/bitcoin/net/discovery/TorDiscovery.java
@@ -0,0 +1,270 @@
+/**
+ * Copyright 2014 Miron Cuperman
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.bitcoin.net.discovery;
+
+import com.google.bitcoin.core.NetworkParameters;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+import com.google.common.util.concurrent.Futures;
+import com.google.common.util.concurrent.ListenableFuture;
+import com.google.common.util.concurrent.ListeningExecutorService;
+import com.google.common.util.concurrent.MoreExecutors;
+import com.subgraph.orchid.Circuit;
+import com.subgraph.orchid.RelayCell;
+import com.subgraph.orchid.Router;
+import com.subgraph.orchid.TorClient;
+import com.subgraph.orchid.circuits.path.CircuitPathChooser;
+import com.subgraph.orchid.data.HexDigest;
+import com.subgraph.orchid.data.exitpolicy.ExitTarget;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.net.InetAddress;
+import java.net.InetSocketAddress;
+import java.net.UnknownHostException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+
+/**
+ *
Supports peer discovery through Tor.
+ *
+ *
Failure to obtain at least four different peers through different exit nodes will cause
+ * a PeerDiscoveryException will be thrown during getPeers().
+ *
+ *
+ *
DNS seeds do not attempt to enumerate every peer on the network. If you want more peers
+ * to connect to, you need to discover them via other means (like addr broadcasts).
+ */
+public class TorDiscovery implements PeerDiscovery {
+ private static final Logger log = LoggerFactory.getLogger(TorDiscovery.class);
+ public static final int MINIMUM_ROUTER_COUNT = 4;
+ public static final int MINIMUM_ROUTER_LOOKUP_COUNT = 10;
+ public static final int RECEIVE_RETRIES = 3;
+ public static final int RESOLVE_STREAM_ID = 0x1000; // An arbitrary stream ID
+ public static final int RESOLVE_CNAME = 0x00;
+ public static final int RESOLVE_ERROR = 0xf0;
+ public static final int RESOLVE_IPV4 = 0x04;
+ public static final int RESOLVE_IPV6 = 0x06;
+
+ private final String[] hostNames;
+ private final NetworkParameters netParams;
+ private final CircuitPathChooser pathChooser;
+ private final TorClient torClient;
+ private ListeningExecutorService threadPool;
+
+ /**
+ * Supports finding peers through Tor. Community run DNS entry points will be used.
+ *
+ * @param netParams Network parameters to be used for port information.
+ */
+ public TorDiscovery(NetworkParameters netParams, TorClient torClient) {
+ this(netParams.getDnsSeeds(), netParams, torClient);
+ }
+
+ /**
+ * Supports finding peers through Tor.
+ *
+ * @param hostNames Host names to be examined for seed addresses.
+ * @param netParams Network parameters to be used for port information.
+ * @param torClient an already-started Tor client.
+ */
+ public TorDiscovery(String[] hostNames, NetworkParameters netParams, TorClient torClient) {
+ this.hostNames = hostNames;
+ this.netParams = netParams;
+
+ this.torClient = torClient;
+ this.pathChooser = CircuitPathChooser.create(torClient.getConfig(), torClient.getDirectory());
+ }
+
+ private static class Lookup {
+ final Router router;
+ final InetAddress address;
+
+ Lookup(Router router, InetAddress address) {
+ this.router = router;
+ this.address = address;
+ }
+ }
+
+ public InetSocketAddress[] getPeers(long timeoutValue, TimeUnit timeoutUnit) throws PeerDiscoveryException {
+ if (hostNames == null)
+ throw new PeerDiscoveryException("Unable to find any peers via DNS");
+
+ Set routers = Sets.newHashSet();
+ ArrayList dummyTargets = Lists.newArrayList();
+
+ // Collect exit nodes until we have enough
+ while (routers.size() < MINIMUM_ROUTER_LOOKUP_COUNT) {
+ Router router = pathChooser.chooseExitNodeForTargets(dummyTargets);
+ routers.add(router);
+ }
+
+ try {
+ List circuits = getCircuits(timeoutValue, timeoutUnit, routers);
+
+ Collection addresses = lookupAddresses(timeoutValue, timeoutUnit, circuits);
+
+ if (addresses.size() < MINIMUM_ROUTER_COUNT)
+ throw new PeerDiscoveryException("Unable to find enough peers via Tor - got " + addresses.size());
+ ArrayList addressList = Lists.newArrayList();
+ addressList.addAll(addresses);
+ Collections.shuffle(addressList);
+ return addressList.toArray(new InetSocketAddress[addressList.size()]);
+ } catch (InterruptedException e) {
+ throw new PeerDiscoveryException(e);
+ }
+ }
+
+ private List getCircuits(long timeoutValue, TimeUnit timeoutUnit, Set routers) throws InterruptedException {
+ createThreadPool(routers.size());
+
+ try {
+ List> circuitFutures = Lists.newArrayList();
+ for (final Router router : routers) {
+ circuitFutures.add(threadPool.submit(new Callable() {
+ public Circuit call() throws Exception {
+ return torClient.getCircuitManager().openInternalCircuitTo(Lists.newArrayList(router));
+ }
+ }));
+ }
+
+ threadPool.awaitTermination(timeoutValue, timeoutUnit);
+ for (ListenableFuture future : circuitFutures) {
+ if (!future.isDone()) {
+ log.warn("circuit timed out");
+ future.cancel(true);
+ }
+ }
+
+ List circuits;
+ try {
+ circuits = Futures.successfulAsList(circuitFutures).get();
+ // Any failures will result in null entries. Remove them.
+ circuits.removeAll(Collections.singleton(null));
+ return circuits;
+ } catch (ExecutionException e) {
+ // Cannot happen, successfulAsList accepts failures
+ throw new RuntimeException(e);
+ }
+ } finally {
+ shutdownThreadPool();
+ }
+ }
+
+ private Collection lookupAddresses(long timeoutValue, TimeUnit timeoutUnit, List circuits) throws InterruptedException {
+ createThreadPool(circuits.size() * hostNames.length);
+
+ try {
+ List> lookupFutures = Lists.newArrayList();
+ for (final Circuit circuit : circuits) {
+ for (final String seed : hostNames) {
+ lookupFutures.add(threadPool.submit(new Callable() {
+ public Lookup call() throws Exception {
+ return new Lookup(circuit.getFinalCircuitNode().getRouter(), lookup(circuit, seed));
+ }
+ }));
+ }
+ }
+
+ threadPool.awaitTermination(timeoutValue, timeoutUnit);
+ for (ListenableFuture future : lookupFutures) {
+ if (!future.isDone()) {
+ log.warn("circuit timed out");
+ future.cancel(true);
+ }
+ }
+
+ try {
+ List lookups = Futures.successfulAsList(lookupFutures).get();
+ // Any failures will result in null entries. Remove them.
+ lookups.removeAll(Collections.singleton(null));
+
+ // Use a map to enforce one result per exit node
+ // TODO: randomize result selection better
+ Map lookupMap = Maps.newHashMap();
+
+ for (Lookup lookup : lookups) {
+ InetSocketAddress address = new InetSocketAddress(lookup.address, netParams.getPort());
+ lookupMap.put(lookup.router.getIdentityHash(), address);
+ }
+
+ return lookupMap.values();
+ } catch (ExecutionException e) {
+ // Cannot happen, successfulAsList accepts failures
+ throw new RuntimeException(e);
+ }
+ } finally {
+ shutdownThreadPool();
+ }
+ }
+
+ private synchronized void shutdownThreadPool() {
+ threadPool.shutdownNow();
+ threadPool = null;
+ }
+
+ private synchronized void createThreadPool(int size) {
+ threadPool =
+ MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(size));
+ }
+
+ private InetAddress lookup(Circuit circuit, String seed) throws UnknownHostException {
+ // Send a resolve cell to the exit node
+ RelayCell cell = circuit.createRelayCell(RelayCell.RELAY_RESOLVE, RESOLVE_STREAM_ID, circuit.getFinalCircuitNode());
+ cell.putString(seed);
+ circuit.sendRelayCell(cell);
+
+ // Wait a few cell timeout periods (3 * 20 sec) for replies, in case the path is slow
+ for (int i = 0 ; i < RECEIVE_RETRIES; i++) {
+ RelayCell res = circuit.receiveRelayCell();
+ if (res != null) {
+ while (res.cellBytesRemaining() > 0) {
+ int type = res.getByte();
+ int len = res.getByte();
+ byte[] value = new byte[len];
+ res.getByteArray(value);
+ int ttl = res.getInt();
+
+ if (type == RESOLVE_CNAME || type >= RESOLVE_ERROR) {
+ // TODO handle .onion CNAME replies
+ throw new RuntimeException(new String(value));
+ } else if (type == RESOLVE_IPV4 || type == RESOLVE_IPV6) {
+ return InetAddress.getByAddress(value);
+ }
+ }
+ break;
+ }
+ }
+ throw new RuntimeException("Could not look up " + seed);
+ }
+
+ public synchronized void shutdown() {
+ if (threadPool != null) {
+ shutdownThreadPool();
+ }
+ }
+}
diff --git a/orchid/.gitignore b/orchid/.gitignore
new file mode 100644
index 00000000..f58915aa
--- /dev/null
+++ b/orchid/.gitignore
@@ -0,0 +1,5 @@
+bin/
+orchid-*.jar
+orchid-*.zip
+build-revision
+lib/xmlrpc-*
diff --git a/orchid/LICENSE b/orchid/LICENSE
new file mode 100644
index 00000000..2738761a
--- /dev/null
+++ b/orchid/LICENSE
@@ -0,0 +1,25 @@
+Copyright (c) 2009-2011, Bruce Leidl
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the author nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/orchid/README b/orchid/README
new file mode 100644
index 00000000..e69de29b
diff --git a/orchid/build.xml b/orchid/build.xml
new file mode 100644
index 00000000..8aa956c1
--- /dev/null
+++ b/orchid/build.xml
@@ -0,0 +1,116 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/orchid/data/GeoIP.dat b/orchid/data/GeoIP.dat
new file mode 100644
index 00000000..fe563297
Binary files /dev/null and b/orchid/data/GeoIP.dat differ
diff --git a/orchid/data/README b/orchid/data/README
new file mode 100644
index 00000000..3eccbba5
--- /dev/null
+++ b/orchid/data/README
@@ -0,0 +1,3 @@
+GeoIP.dat GeoLite Country database downloaded September, 2013
+
+ http://geolite.maxmind.com/download/geoip/database/GeoLiteCountry/GeoIP.dat.gz
diff --git a/orchid/doc/spec/address-spec.txt b/orchid/doc/spec/address-spec.txt
new file mode 100644
index 00000000..2e1aff2b
--- /dev/null
+++ b/orchid/doc/spec/address-spec.txt
@@ -0,0 +1,58 @@
+
+ Special Hostnames in Tor
+ Nick Mathewson
+
+1. Overview
+
+ Most of the time, Tor treats user-specified hostnames as opaque: When
+ the user connects to www.torproject.org, Tor picks an exit node and uses
+ that node to connect to "www.torproject.org". Some hostnames, however,
+ can be used to override Tor's default behavior and circuit-building
+ rules.
+
+ These hostnames can be passed to Tor as the address part of a SOCKS4a or
+ SOCKS5 request. If the application is connected to Tor using an IP-only
+ method (such as SOCKS4, TransPort, or NatdPort), these hostnames can be
+ substituted for certain IP addresses using the MapAddress configuration
+ option or the MAPADDRESS control command.
+
+2. .exit
+
+ SYNTAX: [hostname].[name-or-digest].exit
+ [name-or-digest].exit
+
+ Hostname is a valid hostname; [name-or-digest] is either the nickname of a
+ Tor node or the hex-encoded digest of that node's public key.
+
+ When Tor sees an address in this format, it uses the specified hostname as
+ the exit node. If no "hostname" component is given, Tor defaults to the
+ published IPv4 address of the exit node.
+
+ It is valid to try to resolve hostnames, and in fact upon success Tor
+ will cache an internal mapaddress of the form
+ "www.google.com.foo.exit=64.233.161.99.foo.exit" to speed subsequent
+ lookups.
+
+ The .exit notation is disabled by default as of Tor 0.2.2.1-alpha, due
+ to potential application-level attacks.
+
+ EXAMPLES:
+ www.example.com.exampletornode.exit
+
+ Connect to www.example.com from the node called "exampletornode".
+
+ exampletornode.exit
+
+ Connect to the published IP address of "exampletornode" using
+ "exampletornode" as the exit.
+
+3. .onion
+
+ SYNTAX: [digest].onion
+
+ The digest is the first eighty bits of a SHA1 hash of the identity key for
+ a hidden service, encoded in base32.
+
+ When Tor sees an address in this format, it tries to look up and connect to
+ the specified hidden service. See rend-spec.txt for full details.
+
diff --git a/orchid/doc/spec/bridges-spec.txt b/orchid/doc/spec/bridges-spec.txt
new file mode 100644
index 00000000..64711881
--- /dev/null
+++ b/orchid/doc/spec/bridges-spec.txt
@@ -0,0 +1,249 @@
+
+ Tor bridges specification
+
+0. Preface
+
+ This document describes the design decisions around support for bridge
+ users, bridge relays, and bridge authorities. It acts as an overview
+ of the bridge design and deployment for developers, and it also tries
+ to point out limitations in the current design and implementation.
+
+ For more details on what all of these mean, look at blocking.tex in
+ /doc/design-paper/
+
+1. Bridge relays
+
+ Bridge relays are just like normal Tor relays except they don't publish
+ their server descriptors to the main directory authorities.
+
+1.1. PublishServerDescriptor
+
+ To configure your relay to be a bridge relay, just add
+ BridgeRelay 1
+ PublishServerDescriptor bridge
+ to your torrc. This will cause your relay to publish its descriptor
+ to the bridge authorities rather than to the default authorities.
+
+ Alternatively, you can say
+ BridgeRelay 1
+ PublishServerDescriptor 0
+ which will cause your relay to not publish anywhere. This could be
+ useful for private bridges.
+
+1.2. Recommendations.
+
+ Bridge relays should use an exit policy of "reject *:*". This is
+ because they only need to relay traffic between the bridge users
+ and the rest of the Tor network, so there's no need to let people
+ exit directly from them.
+
+ We invented the RelayBandwidth* options for this situation: Tor clients
+ who want to allow relaying too. See proposal 111 for details. Relay
+ operators should feel free to rate-limit their relayed traffic.
+
+1.3. Implementation note.
+
+ Vidalia 0.0.15 has turned its "Relay" settings page into a tri-state
+ "Don't relay" / "Relay for the Tor network" / "Help censored users".
+
+ If you click the third choice, it forces your exit policy to reject *:*.
+
+ If all the bridges end up on port 9001, that's not so good. On the
+ other hand, putting the bridges on a low-numbered port in the Unix
+ world requires jumping through extra hoops. The current compromise is
+ that Vidalia makes the ORPort default to 443 on Windows, and 9001 on
+ other platforms.
+
+ At the bottom of the relay config settings window, Vidalia displays
+ the bridge identifier to the operator (see Section 3.1) so he can pass
+ it on to bridge users.
+
+2. Bridge authorities.
+
+ Bridge authorities are like normal v3 directory authorities, except
+ they don't create their own network-status documents or votes. So if
+ you ask a bridge authority for a network-status document or consensus,
+ they behave like a directory mirror: they give you one from one of
+ the main authorities. But if you ask the bridge authority for the
+ descriptor corresponding to a particular identity fingerprint, it will
+ happily give you the latest descriptor for that fingerprint.
+
+ To become a bridge authority, add these lines to your torrc:
+ AuthoritativeDirectory 1
+ BridgeAuthoritativeDir 1
+
+ Right now there's one bridge authority, running on the Tonga relay.
+
+2.1. Exporting bridge-purpose descriptors
+
+ We've added a new purpose for server descriptors: the "bridge"
+ purpose. With the new router-descriptors file format that includes
+ annotations, it's easy to look through it and find the bridge-purpose
+ descriptors.
+
+ Currently we export the bridge descriptors from Tonga to the
+ BridgeDB server, so it can give them out according to the policies
+ in blocking.pdf.
+
+2.2. Reachability/uptime testing
+
+ Right now the bridge authorities do active reachability testing of
+ bridges, so we know which ones to recommend for users.
+
+ But in the design document, we suggested that bridges should publish
+ anonymously (i.e. via Tor) to the bridge authority, so somebody watching
+ the bridge authority can't just enumerate all the bridges. But if we're
+ doing active measurement, the game is up. Perhaps we should back off on
+ this goal, or perhaps we should do our active measurement anonymously?
+
+ Answering this issue is scheduled for 0.2.1.x.
+
+2.3. Future work: migrating to multiple bridge authorities
+
+ Having only one bridge authority is both a trust bottleneck (if you
+ break into one place you learn about every single bridge we've got)
+ and a robustness bottleneck (when it's down, bridge users become sad).
+
+ Right now if we put up a second bridge authority, all the bridges would
+ publish to it, and (assuming the code works) bridge users would query
+ a random bridge authority. This resolves the robustness bottleneck,
+ but makes the trust bottleneck even worse.
+
+ In 0.2.2.x and later we should think about better ways to have multiple
+ bridge authorities.
+
+3. Bridge users.
+
+ Bridge users are like ordinary Tor users except they use encrypted
+ directory connections by default, and they use bridge relays as both
+ entry guards (their first hop) and directory guards (the source of
+ all their directory information).
+
+ To become a bridge user, add the following line to your torrc:
+ UseBridges 1
+
+ and then add at least one "Bridge" line to your torrc based on the
+ format below.
+
+3.1. Format of the bridge identifier.
+
+ The canonical format for a bridge identifier contains an IP address,
+ an ORPort, and an identity fingerprint:
+ bridge 128.31.0.34:9009 4C17 FB53 2E20 B2A8 AC19 9441 ECD2 B017 7B39 E4B1
+
+ However, the identity fingerprint can be left out, in which case the
+ bridge user will connect to that relay and use it as a bridge regardless
+ of what identity key it presents:
+ bridge 128.31.0.34:9009
+ This might be useful for cases where only short bridge identifiers
+ can be communicated to bridge users.
+
+ In a future version we may also support bridge identifiers that are
+ only a key fingerprint:
+ bridge 4C17 FB53 2E20 B2A8 AC19 9441 ECD2 B017 7B39 E4B1
+ and the bridge user can fetch the latest descriptor from the bridge
+ authority (see Section 3.4).
+
+3.2. Bridges as entry guards
+
+ For now, bridge users add their bridge relays to their list of "entry
+ guards" (see path-spec.txt for background on entry guards). They are
+ managed by the entry guard algorithms exactly as if they were a normal
+ entry guard -- their keys and timing get cached in the "state" file,
+ etc. This means that when the Tor user starts up with "UseBridges"
+ disabled, he will skip past the bridge entries since they won't be
+ listed as up and usable in his networkstatus consensus. But to be clear,
+ the "entry_guards" list doesn't currently distinguish guards by purpose.
+
+ Internally, each bridge user keeps a smartlist of "bridge_info_t"
+ that reflects the "bridge" lines from his torrc along with a download
+ schedule (see Section 3.5 below). When he starts Tor, he attempts
+ to fetch a descriptor for each configured bridge (see Section 3.4
+ below). When he succeeds at getting a descriptor for one of the bridges
+ in his list, he adds it directly to the entry guard list using the
+ normal add_an_entry_guard() interface. Once a bridge descriptor has
+ been added, should_delay_dir_fetches() will stop delaying further
+ directory fetches, and the user begins to bootstrap his directory
+ information from that bridge (see Section 3.3).
+
+ Currently bridge users cache their bridge descriptors to the
+ "cached-descriptors" file (annotated with purpose "bridge"), but
+ they don't make any attempt to reuse descriptors they find in this
+ file. The theory is that either the bridge is available now, in which
+ case you can get a fresh descriptor, or it's not, in which case an
+ old descriptor won't do you much good.
+
+ We could disable writing out the bridge lines to the state file, if
+ we think this is a problem.
+
+ As an exception, if we get an application request when we have one
+ or more bridge descriptors but we believe none of them are running,
+ we mark them all as running again. This is similar to the exception
+ already in place to help long-idle Tor clients realize they should
+ fetch fresh directory information rather than just refuse requests.
+
+3.3. Bridges as directory guards
+
+ In addition to using bridges as the first hop in their circuits, bridge
+ users also use them to fetch directory updates. Other than initial
+ bootstrapping to find a working bridge descriptor (see Section 3.4
+ below), all further non-anonymized directory fetches will be redirected
+ to the bridge.
+
+ This means that bridge relays need to have cached answers for all
+ questions the bridge user might ask. This makes the upgrade path
+ tricky --- for example, if we migrate to a v4 directory design, the
+ bridge user would need to keep using v3 so long as his bridge relays
+ only knew how to answer v3 queries.
+
+ In a future design, for cases where the user has enough information
+ to build circuits yet the chosen bridge doesn't know how to answer a
+ given query, we might teach bridge users to make an anonymized request
+ to a more suitable directory server.
+
+3.4. How bridge users get their bridge descriptor
+
+ Bridge users can fetch bridge descriptors in two ways: by going directly
+ to the bridge and asking for "/tor/server/authority", or by going to
+ the bridge authority and asking for "/tor/server/fp/ID". By default,
+ they will only try the direct queries. If the user sets
+ UpdateBridgesFromAuthority 1
+ in his config file, then he will try querying the bridge authority
+ first for bridges where he knows a digest (if he only knows an IP
+ address and ORPort, then his only option is a direct query).
+
+ If the user has at least one working bridge, then he will do further
+ queries to the bridge authority through a full three-hop Tor circuit.
+ But when bootstrapping, he will make a direct begin_dir-style connection
+ to the bridge authority.
+
+ As of Tor 0.2.0.10-alpha, if the user attempts to fetch a descriptor
+ from the bridge authority and it returns a 404 not found, the user
+ will automatically fall back to trying a direct query. Therefore it is
+ recommended that bridge users always set UpdateBridgesFromAuthority,
+ since at worst it will delay their fetches a little bit and notify
+ the bridge authority of the identity fingerprint (but not location)
+ of their intended bridges.
+
+3.5. Bridge descriptor retry schedule
+
+ Bridge users try to fetch a descriptor for each bridge (using the
+ steps in Section 3.4 above) on startup. Whenever they receive a
+ bridge descriptor, they reschedule a new descriptor download for 1
+ hour from then.
+
+ If on the other hand it fails, they try again after 15 minutes for the
+ first attempt, after 15 minutes for the second attempt, and after 60
+ minutes for subsequent attempts.
+
+ In 0.2.2.x we should come up with some smarter retry schedules.
+
+3.6. Implementation note.
+
+ Vidalia 0.1.0 has a new checkbox in its Network config window called
+ "My ISP blocks connections to the Tor network." Users who click that
+ box change their configuration to:
+ UseBridges 1
+ UpdateBridgesFromAuthority 1
+ and should add at least one bridge identifier.
+
diff --git a/orchid/doc/spec/control-spec.txt b/orchid/doc/spec/control-spec.txt
new file mode 100644
index 00000000..1a463afc
--- /dev/null
+++ b/orchid/doc/spec/control-spec.txt
@@ -0,0 +1,1853 @@
+
+ TC: A Tor control protocol (Version 1)
+
+0. Scope
+
+ This document describes an implementation-specific protocol that is used
+ for other programs (such as frontend user-interfaces) to communicate with a
+ locally running Tor process. It is not part of the Tor onion routing
+ protocol.
+
+ This protocol replaces version 0 of TC, which is now deprecated. For
+ reference, TC is described in "control-spec-v0.txt". Implementors are
+ recommended to avoid using TC directly, but instead to use a library that
+ can easily be updated to use the newer protocol. (Version 0 is used by Tor
+ versions 0.1.0.x; the protocol in this document only works with Tor
+ versions in the 0.1.1.x series and later.)
+
+1. Protocol outline
+
+ TC is a bidirectional message-based protocol. It assumes an underlying
+ stream for communication between a controlling process (the "client"
+ or "controller") and a Tor process (or "server"). The stream may be
+ implemented via TCP, TLS-over-TCP, a Unix-domain socket, or so on,
+ but it must provide reliable in-order delivery. For security, the
+ stream should not be accessible by untrusted parties.
+
+ In TC, the client and server send typed messages to each other over the
+ underlying stream. The client sends "commands" and the server sends
+ "replies".
+
+ By default, all messages from the server are in response to messages from
+ the client. Some client requests, however, will cause the server to send
+ messages to the client indefinitely far into the future. Such
+ "asynchronous" replies are marked as such.
+
+ Servers respond to messages in the order messages are received.
+
+2. Message format
+
+2.1. Description format
+
+ The message formats listed below use ABNF as described in RFC 2234.
+ The protocol itself is loosely based on SMTP (see RFC 2821).
+
+ We use the following nonterminals from RFC 2822: atom, qcontent
+
+ We define the following general-use nonterminals:
+
+ String = DQUOTE *qcontent DQUOTE
+
+ There are explicitly no limits on line length. All 8-bit characters are
+ permitted unless explicitly disallowed.
+
+ Wherever CRLF is specified to be accepted from the controller, Tor MAY also
+ accept LF. Tor, however, MUST NOT generate LF instead of CRLF.
+ Controllers SHOULD always send CRLF.
+
+2.2. Commands from controller to Tor
+
+ Command = Keyword Arguments CRLF / "+" Keyword Arguments CRLF Data
+ Keyword = 1*ALPHA
+ Arguments = *(SP / VCHAR)
+
+ Specific commands and their arguments are described below in section 3.
+
+2.3. Replies from Tor to the controller
+
+ Reply = SyncReply / AsyncReply
+ SyncReply = *(MidReplyLine / DataReplyLine) EndReplyLine
+ AsyncReply = *(MidReplyLine / DataReplyLine) EndReplyLine
+
+ MidReplyLine = StatusCode "-" ReplyLine
+ DataReplyLine = StatusCode "+" ReplyLine Data
+ EndReplyLine = StatusCode SP ReplyLine
+ ReplyLine = [ReplyText] CRLF
+ ReplyText = XXXX
+ StatusCode = 3DIGIT
+
+ Specific replies are mentioned below in section 3, and described more fully
+ in section 4.
+
+ [Compatibility note: versions of Tor before 0.2.0.3-alpha sometimes
+ generate AsyncReplies of the form "*(MidReplyLine / DataReplyLine)".
+ This is incorrect, but controllers that need to work with these
+ versions of Tor should be prepared to get multi-line AsyncReplies with
+ the final line (usually "650 OK") omitted.]
+
+2.4. General-use tokens
+
+ ; Identifiers for servers.
+ ServerID = Nickname / Fingerprint
+
+ Nickname = 1*19 NicknameChar
+ NicknameChar = "a"-"z" / "A"-"Z" / "0" - "9"
+ Fingerprint = "$" 40*HEXDIG
+
+ ; A "=" indicates that the given nickname is canonical; a "~" indicates
+ ; that the given nickname is not canonical. If no nickname is given at
+ ; all, Tor does not even have a guess for what this router calls itself.
+ LongName = Fingerprint [ ( "=" / "~" ) Nickname ]
+
+ ; How a controller tells Tor about a particular OR. There are four
+ ; possible formats:
+ ; $Digest -- The router whose identity key hashes to the given digest.
+ ; This is the preferred way to refer to an OR.
+ ; $Digest~Name -- The router whose identity key hashes to the given
+ ; digest, but only if the router has the given nickname.
+ ; $Digest=Name -- The router whose identity key hashes to the given
+ ; digest, but only if the router is Named and has the given
+ ; nickname.
+ ; Name -- The Named router with the given nickname, or, if no such
+ ; router exists, any router whose nickname matches the one given.
+ ; This is not a safe way to refer to routers, since Named status
+ ; could under some circumstances change over time.
+ ServerSpec = LongName / Nickname
+
+ ; Unique identifiers for streams or circuits. Currently, Tor only
+ ; uses digits, but this may change
+ StreamID = 1*16 IDChar
+ CircuitID = 1*16 IDChar
+ IDChar = ALPHA / DIGIT
+
+ Address = ip4-address / ip6-address / hostname (XXXX Define these)
+
+ ; A "Data" section is a sequence of octets concluded by the terminating
+ ; sequence CRLF "." CRLF. The terminating sequence may not appear in the
+ ; body of the data. Leading periods on lines in the data are escaped with
+ ; an additional leading period as in RFC 2821 section 4.5.2.
+ Data = *DataLine "." CRLF
+ DataLine = CRLF / "." 1*LineItem CRLF / NonDotItem *LineItem CRLF
+ LineItem = NonCR / 1*CR NonCRLF
+ NonDotItem = NonDotCR / 1*CR NonCRLF
+
+3. Commands
+
+ All commands are case-insensitive, but most keywords are case-sensitive.
+
+3.1. SETCONF
+
+ Change the value of one or more configuration variables. The syntax is:
+
+ "SETCONF" 1*(SP keyword ["=" value]) CRLF
+ value = String / QuotedString
+
+ Tor behaves as though it had just read each of the key-value pairs
+ from its configuration file. Keywords with no corresponding values have
+ their configuration values reset to 0 or NULL (use RESETCONF if you want
+ to set it back to its default). SETCONF is all-or-nothing: if there
+ is an error in any of the configuration settings, Tor sets none of them.
+
+ Tor responds with a "250 configuration values set" reply on success.
+ If some of the listed keywords can't be found, Tor replies with a
+ "552 Unrecognized option" message. Otherwise, Tor responds with a
+ "513 syntax error in configuration values" reply on syntax error, or a
+ "553 impossible configuration setting" reply on a semantic error.
+
+ When a configuration option takes multiple values, or when multiple
+ configuration keys form a context-sensitive group (see GETCONF below), then
+ setting _any_ of the options in a SETCONF command is taken to reset all of
+ the others. For example, if two ORBindAddress values are configured, and a
+ SETCONF command arrives containing a single ORBindAddress value, the new
+ command's value replaces the two old values.
+
+ Sometimes it is not possible to change configuration options solely by
+ issuing a series of SETCONF commands, because the value of one of the
+ configuration options depends on the value of another which has not yet
+ been set. Such situations can be overcome by setting multiple configuration
+ options with a single SETCONF command (e.g. SETCONF ORPort=443
+ ORListenAddress=9001).
+
+3.2. RESETCONF
+
+ Remove all settings for a given configuration option entirely, assign
+ its default value (if any), and then assign the String provided.
+ Typically the String is left empty, to simply set an option back to
+ its default. The syntax is:
+
+ "RESETCONF" 1*(SP keyword ["=" String]) CRLF
+
+ Otherwise it behaves like SETCONF above.
+
+3.3. GETCONF
+
+ Request the value of a configuration variable. The syntax is:
+
+ "GETCONF" 1*(SP keyword) CRLF
+
+ If all of the listed keywords exist in the Tor configuration, Tor replies
+ with a series of reply lines of the form:
+ 250 keyword=value
+ If any option is set to a 'default' value semantically different from an
+ empty string, Tor may reply with a reply line of the form:
+ 250 keyword
+
+ Value may be a raw value or a quoted string. Tor will try to use
+ unquoted values except when the value could be misinterpreted through
+ not being quoted.
+
+ If some of the listed keywords can't be found, Tor replies with a
+ "552 unknown configuration keyword" message.
+
+ If an option appears multiple times in the configuration, all of its
+ key-value pairs are returned in order.
+
+ Some options are context-sensitive, and depend on other options with
+ different keywords. These cannot be fetched directly. Currently there
+ is only one such option: clients should use the "HiddenServiceOptions"
+ virtual keyword to get all HiddenServiceDir, HiddenServicePort,
+ HiddenServiceNodes, and HiddenServiceExcludeNodes option settings.
+
+3.4. SETEVENTS
+
+ Request the server to inform the client about interesting events. The
+ syntax is:
+
+ "SETEVENTS" [SP "EXTENDED"] *(SP EventCode) CRLF
+
+ EventCode = "CIRC" / "STREAM" / "ORCONN" / "BW" / "DEBUG" /
+ "INFO" / "NOTICE" / "WARN" / "ERR" / "NEWDESC" / "ADDRMAP" /
+ "AUTHDIR_NEWDESCS" / "DESCCHANGED" / "STATUS_GENERAL" /
+ "STATUS_CLIENT" / "STATUS_SERVER" / "GUARD" / "NS" / "STREAM_BW" /
+ "CLIENTS_SEEN" / "NEWCONSENSUS"
+
+ Any events *not* listed in the SETEVENTS line are turned off; thus, sending
+ SETEVENTS with an empty body turns off all event reporting.
+
+ The server responds with a "250 OK" reply on success, and a "552
+ Unrecognized event" reply if one of the event codes isn't recognized. (On
+ error, the list of active event codes isn't changed.)
+
+ If the flag string "EXTENDED" is provided, Tor may provide extra
+ information with events for this connection; see 4.1 for more information.
+ NOTE: All events on a given connection will be provided in extended format,
+ or none.
+ NOTE: "EXTENDED" is only supported in Tor 0.1.1.9-alpha or later.
+
+ Each event is described in more detail in Section 4.1.
+
+3.5. AUTHENTICATE
+
+ Sent from the client to the server. The syntax is:
+ "AUTHENTICATE" [ SP 1*HEXDIG / QuotedString ] CRLF
+
+ The server responds with "250 OK" on success or "515 Bad authentication" if
+ the authentication cookie is incorrect. Tor closes the connection on an
+ authentication failure.
+
+ The format of the 'cookie' is implementation-dependent; see 5.1 below for
+ information on how the standard Tor implementation handles it.
+
+ Before the client has authenticated, no command other than PROTOCOLINFO,
+ AUTHENTICATE, or QUIT is valid. If the controller sends any other command,
+ or sends a malformed command, or sends an unsuccessful AUTHENTICATE
+ command, or sends PROTOCOLINFO more than once, Tor sends an error reply and
+ closes the connection.
+
+ To prevent some cross-protocol attacks, the AUTHENTICATE command is still
+ required even if all authentication methods in Tor are disabled. In this
+ case, the controller should just send "AUTHENTICATE" CRLF.
+
+ (Versions of Tor before 0.1.2.16 and 0.2.0.4-alpha did not close the
+ connection after an authentication failure.)
+
+3.6. SAVECONF
+
+ Sent from the client to the server. The syntax is:
+ "SAVECONF" CRLF
+
+ Instructs the server to write out its config options into its torrc. Server
+ returns "250 OK" if successful, or "551 Unable to write configuration
+ to disk" if it can't write the file or some other error occurs.
+
+3.7. SIGNAL
+
+ Sent from the client to the server. The syntax is:
+
+ "SIGNAL" SP Signal CRLF
+
+ Signal = "RELOAD" / "SHUTDOWN" / "DUMP" / "DEBUG" / "HALT" /
+ "HUP" / "INT" / "USR1" / "USR2" / "TERM" / "NEWNYM" /
+ "CLEARDNSCACHE"
+
+ The meaning of the signals are:
+
+ RELOAD -- Reload: reload config items, refetch directory. (like HUP)
+ SHUTDOWN -- Controlled shutdown: if server is an OP, exit immediately.
+ If it's an OR, close listeners and exit after 30 seconds.
+ (like INT)
+ DUMP -- Dump stats: log information about open connections and
+ circuits. (like USR1)
+ DEBUG -- Debug: switch all open logs to loglevel debug. (like USR2)
+ HALT -- Immediate shutdown: clean up and exit now. (like TERM)
+ CLEARDNSCACHE -- Forget the client-side cached IPs for all hostnames.
+ NEWNYM -- Switch to clean circuits, so new application requests
+ don't share any circuits with old ones. Also clears
+ the client-side DNS cache. (Tor MAY rate-limit its
+ response to this signal.)
+
+ The server responds with "250 OK" if the signal is recognized (or simply
+ closes the socket if it was asked to close immediately), or "552
+ Unrecognized signal" if the signal is unrecognized.
+
+3.8. MAPADDRESS
+
+ Sent from the client to the server. The syntax is:
+
+ "MAPADDRESS" 1*(Address "=" Address SP) CRLF
+
+ The first address in each pair is an "original" address; the second is a
+ "replacement" address. The client sends this message to the server in
+ order to tell it that future SOCKS requests for connections to the original
+ address should be replaced with connections to the specified replacement
+ address. If the addresses are well-formed, and the server is able to
+ fulfill the request, the server replies with a 250 message:
+ 250-OldAddress1=NewAddress1
+ 250 OldAddress2=NewAddress2
+
+ containing the source and destination addresses. If request is
+ malformed, the server replies with "512 syntax error in command
+ argument". If the server can't fulfill the request, it replies with
+ "451 resource exhausted".
+
+ The client may decline to provide a body for the original address, and
+ instead send a special null address ("0.0.0.0" for IPv4, "::0" for IPv6, or
+ "." for hostname), signifying that the server should choose the original
+ address itself, and return that address in the reply. The server
+ should ensure that it returns an element of address space that is unlikely
+ to be in actual use. If there is already an address mapped to the
+ destination address, the server may reuse that mapping.
+
+ If the original address is already mapped to a different address, the old
+ mapping is removed. If the original address and the destination address
+ are the same, the server removes any mapping in place for the original
+ address.
+
+ Example:
+ C: MAPADDRESS 0.0.0.0=torproject.org 1.2.3.4=tor.freehaven.net
+ S: 250-127.192.10.10=torproject.org
+ S: 250 1.2.3.4=tor.freehaven.net
+
+ {Note: This feature is designed to be used to help Tor-ify applications
+ that need to use SOCKS4 or hostname-less SOCKS5. There are three
+ approaches to doing this:
+ 1. Somehow make them use SOCKS4a or SOCKS5-with-hostnames instead.
+ 2. Use tor-resolve (or another interface to Tor's resolve-over-SOCKS
+ feature) to resolve the hostname remotely. This doesn't work
+ with special addresses like x.onion or x.y.exit.
+ 3. Use MAPADDRESS to map an IP address to the desired hostname, and then
+ arrange to fool the application into thinking that the hostname
+ has resolved to that IP.
+ This functionality is designed to help implement the 3rd approach.}
+
+ Mappings set by the controller last until the Tor process exits:
+ they never expire. If the controller wants the mapping to last only
+ a certain time, then it must explicitly un-map the address when that
+ time has elapsed.
+
+3.9. GETINFO
+
+ Sent from the client to the server. The syntax is as for GETCONF:
+ "GETINFO" 1*(SP keyword) CRLF
+ one or more NL-terminated strings. The server replies with an INFOVALUE
+ message, or a 551 or 552 error.
+
+ Unlike GETCONF, this message is used for data that are not stored in the Tor
+ configuration file, and that may be longer than a single line. On success,
+ one ReplyLine is sent for each requested value, followed by a final 250 OK
+ ReplyLine. If a value fits on a single line, the format is:
+ 250-keyword=value
+ If a value must be split over multiple lines, the format is:
+ 250+keyword=
+ value
+ .
+ Recognized keys and their values include:
+
+ "version" -- The version of the server's software, including the name
+ of the software. (example: "Tor 0.0.9.4")
+
+ "config-file" -- The location of Tor's configuration file ("torrc").
+
+ ["exit-policy/prepend" -- The default exit policy lines that Tor will
+ *prepend* to the ExitPolicy config option.
+ -- Never implemented. Useful?]
+
+ "exit-policy/default" -- The default exit policy lines that Tor will
+ *append* to the ExitPolicy config option.
+
+ "desc/id/" or "desc/name/" -- the latest
+ server descriptor for a given OR, NUL-terminated.
+
+ "desc-annotations/id/" -- outputs the annotations string
+ (source, timestamp of arrival, purpose, etc) for the corresponding
+ descriptor. [First implemented in 0.2.0.13-alpha.]
+
+ "extra-info/digest/" -- the extrainfo document whose digest (in
+ hex) is . Only available if we're downloading extra-info
+ documents.
+
+ "ns/id/" or "ns/name/" -- the latest router
+ status info (v2 directory style) for a given OR. Router status
+ info is as given in
+ dir-spec.txt, and reflects the current beliefs of this Tor about the
+ router in question. Like directory clients, controllers MUST
+ tolerate unrecognized flags and lines. The published date and
+ descriptor digest are those believed to be best by this Tor,
+ not necessarily those for a descriptor that Tor currently has.
+ [First implemented in 0.1.2.3-alpha.]
+
+ "ns/all" -- Router status info (v2 directory style) for all ORs we
+ have an opinion about, joined by newlines. [First implemented
+ in 0.1.2.3-alpha.]
+
+ "ns/purpose/" -- Router status info (v2 directory style)
+ for all ORs of this purpose. Mostly designed for /ns/purpose/bridge
+ queries. [First implemented in 0.2.0.13-alpha.]
+
+ "desc/all-recent" -- the latest server descriptor for every router that
+ Tor knows about.
+
+ "network-status" -- a space-separated list (v1 directory style)
+ of all known OR identities. This is in the same format as the
+ router-status line in v1 directories; see dir-spec-v1.txt section
+ 3 for details. (If VERBOSE_NAMES is enabled, the output will
+ not conform to dir-spec-v1.txt; instead, the result will be a
+ space-separated list of LongName, each preceded by a "!" if it is
+ believed to be not running.) This option is deprecated; use
+ "ns/all" instead.
+
+ "address-mappings/all"
+ "address-mappings/config"
+ "address-mappings/cache"
+ "address-mappings/control" -- a \r\n-separated list of address
+ mappings, each in the form of "from-address to-address expiry".
+ The 'config' key returns those address mappings set in the
+ configuration; the 'cache' key returns the mappings in the
+ client-side DNS cache; the 'control' key returns the mappings set
+ via the control interface; the 'all' target returns the mappings
+ set through any mechanism.
+ Expiry is formatted as with ADDRMAP events, except that "expiry" is
+ always a time in GMT or the string "NEVER"; see section 4.1.7.
+ First introduced in 0.2.0.3-alpha.
+
+ "addr-mappings/*" -- as for address-mappings/*, but without the
+ expiry portion of the value. Use of this value is deprecated
+ since 0.2.0.3-alpha; use address-mappings instead.
+
+ "address" -- the best guess at our external IP address. If we
+ have no guess, return a 551 error. (Added in 0.1.2.2-alpha)
+
+ "fingerprint" -- the contents of the fingerprint file that Tor
+ writes as a server, or a 551 if we're not a server currently.
+ (Added in 0.1.2.3-alpha)
+
+ "circuit-status"
+ A series of lines as for a circuit status event. Each line is of
+ the form:
+ CircuitID SP CircStatus [SP Path] CRLF
+
+ "stream-status"
+ A series of lines as for a stream status event. Each is of the form:
+ StreamID SP StreamStatus SP CircID SP Target CRLF
+
+ "orconn-status"
+ A series of lines as for an OR connection status event. Each is of the
+ form:
+ ServerID SP ORStatus CRLF
+
+ "entry-guards"
+ A series of lines listing the currently chosen entry guards, if any.
+ Each is of the form:
+ ServerID2 SP Status [SP ISOTime] CRLF
+
+ Status-with-time = ("unlisted") SP ISOTime
+ Status = ("up" / "never-connected" / "down" /
+ "unusable" / "unlisted" )
+
+ ServerID2 = Nickname / 40*HEXDIG
+
+ [From 0.1.1.4-alpha to 0.1.1.10-alpha, this was called "helper-nodes".
+ Tor still supports calling it that for now, but support will be
+ removed in 0.1.3.x.]
+
+ [Older versions of Tor (before 0.1.2.x-final) generated 'down' instead
+ of unlisted/unusable. Current Tors never generate 'down'.]
+
+ [XXXX ServerID2 differs from ServerID in not prefixing fingerprints
+ with a $. This is an implementation error. It would be nice to add
+ the $ back in if we can do so without breaking compatibility.]
+
+ "accounting/enabled"
+ "accounting/hibernating"
+ "accounting/bytes"
+ "accounting/bytes-left"
+ "accounting/interval-start"
+ "accounting/interval-wake"
+ "accounting/interval-end"
+ Information about accounting status. If accounting is enabled,
+ "enabled" is 1; otherwise it is 0. The "hibernating" field is "hard"
+ if we are accepting no data; "soft" if we're accepting no new
+ connections, and "awake" if we're not hibernating at all. The "bytes"
+ and "bytes-left" fields contain (read-bytes SP write-bytes), for the
+ start and the rest of the interval respectively. The 'interval-start'
+ and 'interval-end' fields are the borders of the current interval; the
+ 'interval-wake' field is the time within the current interval (if any)
+ where we plan[ned] to start being active. The times are GMT.
+
+ "config/names"
+ A series of lines listing the available configuration options. Each is
+ of the form:
+ OptionName SP OptionType [ SP Documentation ] CRLF
+ OptionName = Keyword
+ OptionType = "Integer" / "TimeInterval" / "DataSize" / "Float" /
+ "Boolean" / "Time" / "CommaList" / "Dependant" / "Virtual" /
+ "String" / "LineList"
+ Documentation = Text
+
+ "info/names"
+ A series of lines listing the available GETINFO options. Each is of
+ one of these forms:
+ OptionName SP Documentation CRLF
+ OptionPrefix SP Documentation CRLF
+ OptionPrefix = OptionName "/*"
+
+ "events/names"
+ A space-separated list of all the events supported by this version of
+ Tor's SETEVENTS.
+
+ "features/names"
+ A space-separated list of all the events supported by this version of
+ Tor's USEFEATURE.
+
+ "ip-to-country/*"
+ Maps IP addresses to 2-letter country codes. For example,
+ "GETINFO ip-to-country/18.0.0.1" should give "US".
+
+ "next-circuit/IP:port"
+ XXX todo.
+
+ "dir/status-vote/current/consensus" [added in Tor 0.2.1.6-alpha]
+ "dir/status/authority"
+ "dir/status/fp/"
+ "dir/status/fp/++"
+ "dir/status/all"
+ "dir/server/fp/"
+ "dir/server/fp/++"
+ "dir/server/d/"
+ "dir/server/d/++"
+ "dir/server/authority"
+ "dir/server/all"
+ A series of lines listing directory contents, provided according to the
+ specification for the URLs listed in Section 4.4 of dir-spec.txt. Note
+ that Tor MUST NOT provide private information, such as descriptors for
+ routers not marked as general-purpose. When asked for 'authority'
+ information for which this Tor is not authoritative, Tor replies with
+ an empty string.
+
+ "status/circuit-established"
+ "status/enough-dir-info"
+ "status/good-server-descriptor"
+ "status/accepted-server-descriptor"
+ "status/..."
+ These provide the current internal Tor values for various Tor
+ states. See Section 4.1.10 for explanations. (Only a few of the
+ status events are available as getinfo's currently. Let us know if
+ you want more exposed.)
+ "status/reachability-succeeded/or"
+ 0 or 1, depending on whether we've found our ORPort reachable.
+ "status/reachability-succeeded/dir"
+ 0 or 1, depending on whether we've found our DirPort reachable.
+ "status/reachability-succeeded"
+ "OR=" ("0"/"1") SP "DIR=" ("0"/"1")
+ Combines status/reachability-succeeded/*; controllers MUST ignore
+ unrecognized elements in this entry.
+ "status/bootstrap-phase"
+ Returns the most recent bootstrap phase status event
+ sent. Specifically, it returns a string starting with either
+ "NOTICE BOOTSTRAP ..." or "WARN BOOTSTRAP ...". Controllers should
+ use this getinfo when they connect or attach to Tor to learn its
+ current bootstrap state.
+ "status/version/recommended"
+ List of currently recommended versions.
+ "status/version/current"
+ Status of the current version. One of: new, old, unrecommended,
+ recommended, new in series, obsolete.
+ "status/clients-seen"
+ A summary of which countries we've seen clients from recently,
+ formatted the same as the CLIENTS_SEEN status event described in
+ Section 4.1.14. This GETINFO option is currently available only
+ for bridge relays.
+
+ Examples:
+ C: GETINFO version desc/name/moria1
+ S: 250+desc/name/moria=
+ S: [Descriptor for moria]
+ S: .
+ S: 250-version=Tor 0.1.1.0-alpha-cvs
+ S: 250 OK
+
+3.10. EXTENDCIRCUIT
+
+ Sent from the client to the server. The format is:
+ "EXTENDCIRCUIT" SP CircuitID SP
+ ServerSpec *("," ServerSpec)
+ [SP "purpose=" Purpose] CRLF
+
+ This request takes one of two forms: either the CircuitID is zero, in
+ which case it is a request for the server to build a new circuit according
+ to the specified path, or the CircuitID is nonzero, in which case it is a
+ request for the server to extend an existing circuit with that ID according
+ to the specified path.
+
+ If CircuitID is 0 and "purpose=" is specified, then the circuit's
+ purpose is set. Two choices are recognized: "general" and
+ "controller". If not specified, circuits are created as "general".
+
+ If the request is successful, the server sends a reply containing a
+ message body consisting of the CircuitID of the (maybe newly created)
+ circuit. The syntax is "250" SP "EXTENDED" SP CircuitID CRLF.
+
+3.11. SETCIRCUITPURPOSE
+
+ Sent from the client to the server. The format is:
+ "SETCIRCUITPURPOSE" SP CircuitID SP Purpose CRLF
+
+ This changes the circuit's purpose. See EXTENDCIRCUIT above for details.
+
+3.12. SETROUTERPURPOSE
+
+ Sent from the client to the server. The format is:
+ "SETROUTERPURPOSE" SP NicknameOrKey SP Purpose CRLF
+
+ This changes the descriptor's purpose. See +POSTDESCRIPTOR below
+ for details.
+
+ NOTE: This command was disabled and made obsolete as of Tor
+ 0.2.0.8-alpha. It doesn't exist anymore, and is listed here only for
+ historical interest.
+
+3.13. ATTACHSTREAM
+
+ Sent from the client to the server. The syntax is:
+ "ATTACHSTREAM" SP StreamID SP CircuitID [SP "HOP=" HopNum] CRLF
+
+ This message informs the server that the specified stream should be
+ associated with the specified circuit. Each stream may be associated with
+ at most one circuit, and multiple streams may share the same circuit.
+ Streams can only be attached to completed circuits (that is, circuits that
+ have sent a circuit status 'BUILT' event or are listed as built in a
+ GETINFO circuit-status request).
+
+ If the circuit ID is 0, responsibility for attaching the given stream is
+ returned to Tor.
+
+ If HOP=HopNum is specified, Tor will choose the HopNumth hop in the
+ circuit as the exit node, rather than the last node in the circuit.
+ Hops are 1-indexed; generally, it is not permitted to attach to hop 1.
+
+ Tor responds with "250 OK" if it can attach the stream, 552 if the circuit
+ or stream didn't exist, or 551 if the stream couldn't be attached for
+ another reason.
+
+ {Implementation note: Tor will close unattached streams by itself,
+ roughly two minutes after they are born. Let the developers know if
+ that turns out to be a problem.}
+
+ {Implementation note: By default, Tor automatically attaches streams to
+ circuits itself, unless the configuration variable
+ "__LeaveStreamsUnattached" is set to "1". Attempting to attach streams
+ via TC when "__LeaveStreamsUnattached" is false may cause a race between
+ Tor and the controller, as both attempt to attach streams to circuits.}
+
+ {Implementation note: You can try to attachstream to a stream that
+ has already sent a connect or resolve request but hasn't succeeded
+ yet, in which case Tor will detach the stream from its current circuit
+ before proceeding with the new attach request.}
+
+3.14. POSTDESCRIPTOR
+
+ Sent from the client to the server. The syntax is:
+ "+POSTDESCRIPTOR" [SP "purpose=" Purpose] [SP "cache=" Cache]
+ CRLF Descriptor CRLF "." CRLF
+
+ This message informs the server about a new descriptor. If Purpose is
+ specified, it must be either "general", "controller", or "bridge",
+ else we return a 552 error. The default is "general".
+
+ If Cache is specified, it must be either "no" or "yes", else we
+ return a 552 error. If Cache is not specified, Tor will decide for
+ itself whether it wants to cache the descriptor, and controllers
+ must not rely on its choice.
+
+ The descriptor, when parsed, must contain a number of well-specified
+ fields, including fields for its nickname and identity.
+
+ If there is an error in parsing the descriptor, the server must send a
+ "554 Invalid descriptor" reply. If the descriptor is well-formed but
+ the server chooses not to add it, it must reply with a 251 message
+ whose body explains why the server was not added. If the descriptor
+ is added, Tor replies with "250 OK".
+
+3.15. REDIRECTSTREAM
+
+ Sent from the client to the server. The syntax is:
+ "REDIRECTSTREAM" SP StreamID SP Address [SP Port] CRLF
+
+ Tells the server to change the exit address on the specified stream. If
+ Port is specified, changes the destination port as well. No remapping
+ is performed on the new provided address.
+
+ To be sure that the modified address will be used, this event must be sent
+ after a new stream event is received, and before attaching this stream to
+ a circuit.
+
+ Tor replies with "250 OK" on success.
+
+3.16. CLOSESTREAM
+
+ Sent from the client to the server. The syntax is:
+
+ "CLOSESTREAM" SP StreamID SP Reason *(SP Flag) CRLF
+
+ Tells the server to close the specified stream. The reason should be one
+ of the Tor RELAY_END reasons given in tor-spec.txt, as a decimal. Flags is
+ not used currently; Tor servers SHOULD ignore unrecognized flags. Tor may
+ hold the stream open for a while to flush any data that is pending.
+
+ Tor replies with "250 OK" on success, or a 512 if there aren't enough
+ arguments, or a 552 if it doesn't recognize the StreamID or reason.
+
+3.17. CLOSECIRCUIT
+
+ The syntax is:
+ CLOSECIRCUIT SP CircuitID *(SP Flag) CRLF
+ Flag = "IfUnused"
+
+ Tells the server to close the specified circuit. If "IfUnused" is
+ provided, do not close the circuit unless it is unused.
+
+ Other flags may be defined in the future; Tor SHOULD ignore unrecognized
+ flags.
+
+ Tor replies with "250 OK" on success, or a 512 if there aren't enough
+ arguments, or a 552 if it doesn't recognize the CircuitID.
+
+3.18. QUIT
+
+ Tells the server to hang up on this controller connection. This command
+ can be used before authenticating.
+
+3.19. USEFEATURE
+
+ The syntax is:
+
+ "USEFEATURE" *(SP FeatureName) CRLF
+ FeatureName = 1*(ALPHA / DIGIT / "_" / "-")
+
+ Sometimes extensions to the controller protocol break compatibility with
+ older controllers. In this case, whenever possible, the extensions are
+ first included in Tor disabled by default, and only enabled on a given
+ controller connection when the "USEFEATURE" command is given. Once a
+ "USEFEATURE" command is given, it applies to all subsequent interactions on
+ the same connection; to disable an enabled feature, a new controller
+ connection must be opened.
+
+ This is a forward-compatibility mechanism; each feature will eventually
+ become a regular part of the control protocol in some future version of Tor.
+ Tor will ignore a request to use any feature that is already on by default.
+ Tor will give a "552" error if any requested feature is not recognized.
+
+ Feature names are case-insensitive.
+
+ EXTENDED_EVENTS
+
+ Same as passing 'EXTENDED' to SETEVENTS; this is the preferred way to
+ request the extended event syntax.
+
+ This feature was first used in 0.1.2.3-alpha. It is always-on in
+ Tor 0.2.2.1-alpha and later.
+
+ VERBOSE_NAMES
+
+ Instead of ServerID as specified above, the controller should
+ identify ORs by LongName in events and GETINFO results. This format is
+ strictly more informative: rather than including Nickname for
+ known Named routers and Fingerprint for unknown or unNamed routers, the
+ LongName format includes a Fingerprint, an indication of Named status,
+ and a Nickname (if one is known).
+
+ This will not be always-enabled until at least two stable
+ releases after 0.1.2.2-alpha, the release where it was first
+ available. It is always-on in Tor 0.2.2.1-alpha and later.
+
+3.20. RESOLVE
+
+ The syntax is
+ "RESOLVE" *Option *Address CRLF
+ Option = "mode=reverse"
+ Address = a hostname or IPv4 address
+
+ This command launches a remote hostname lookup request for every specified
+ request (or reverse lookup if "mode=reverse" is specified). Note that the
+ request is done in the background: to see the answers, your controller will
+ need to listen for ADDRMAP events; see 4.1.7 below.
+
+ [Added in Tor 0.2.0.3-alpha]
+
+3.21. PROTOCOLINFO
+
+ The syntax is:
+ "PROTOCOLINFO" *(SP PIVERSION) CRLF
+
+ The server reply format is:
+ "250-PROTOCOLINFO" SP PIVERSION CRLF *InfoLine "250 OK" CRLF
+
+ InfoLine = AuthLine / VersionLine / OtherLine
+
+ AuthLine = "250-AUTH" SP "METHODS=" AuthMethod *(",")AuthMethod
+ *(SP "COOKIEFILE=" AuthCookieFile) CRLF
+ VersionLine = "250-VERSION" SP "Tor=" TorVersion [SP Arguments] CRLF
+
+ AuthMethod =
+ "NULL" / ; No authentication is required
+ "HASHEDPASSWORD" / ; A controller must supply the original password
+ "COOKIE" / ; A controller must supply the contents of a cookie
+
+ AuthCookieFile = QuotedString
+ TorVersion = QuotedString
+
+ OtherLine = "250-" Keyword [SP Arguments] CRLF
+
+ PIVERSION: 1*DIGIT
+
+ Tor MAY give its InfoLines in any order; controllers MUST ignore InfoLines
+ with keywords they do not recognize. Controllers MUST ignore extraneous
+ data on any InfoLine.
+
+ PIVERSION is there in case we drastically change the syntax one day. For
+ now it should always be "1". Controllers MAY provide a list of the
+ protocolinfo versions they support; Tor MAY select a version that the
+ controller does not support.
+
+ AuthMethod is used to specify one or more control authentication
+ methods that Tor currently accepts.
+
+ AuthCookieFile specifies the absolute path and filename of the
+ authentication cookie that Tor is expecting and is provided iff
+ the METHODS field contains the method "COOKIE". Controllers MUST handle
+ escape sequences inside this string.
+
+ The VERSION line contains the Tor version.
+
+ [Unlike other commands besides AUTHENTICATE, PROTOCOLINFO may be used (but
+ only once!) before AUTHENTICATE.]
+
+ [PROTOCOLINFO was not supported before Tor 0.2.0.5-alpha.]
+
+4. Replies
+
+ Reply codes follow the same 3-character format as used by SMTP, with the
+ first character defining a status, the second character defining a
+ subsystem, and the third designating fine-grained information.
+
+ The TC protocol currently uses the following first characters:
+
+ 2yz Positive Completion Reply
+ The command was successful; a new request can be started.
+
+ 4yz Temporary Negative Completion reply
+ The command was unsuccessful but might be reattempted later.
+
+ 5yz Permanent Negative Completion Reply
+ The command was unsuccessful; the client should not try exactly
+ that sequence of commands again.
+
+ 6yz Asynchronous Reply
+ Sent out-of-order in response to an earlier SETEVENTS command.
+
+ The following second characters are used:
+
+ x0z Syntax
+ Sent in response to ill-formed or nonsensical commands.
+
+ x1z Protocol
+ Refers to operations of the Tor Control protocol.
+
+ x5z Tor
+ Refers to actual operations of Tor system.
+
+ The following codes are defined:
+
+ 250 OK
+ 251 Operation was unnecessary
+ [Tor has declined to perform the operation, but no harm was done.]
+
+ 451 Resource exhausted
+
+ 500 Syntax error: protocol
+
+ 510 Unrecognized command
+ 511 Unimplemented command
+ 512 Syntax error in command argument
+ 513 Unrecognized command argument
+ 514 Authentication required
+ 515 Bad authentication
+
+ 550 Unspecified Tor error
+
+ 551 Internal error
+ [Something went wrong inside Tor, so that the client's
+ request couldn't be fulfilled.]
+
+ 552 Unrecognized entity
+ [A configuration key, a stream ID, circuit ID, event,
+ mentioned in the command did not actually exist.]
+
+ 553 Invalid configuration value
+ [The client tried to set a configuration option to an
+ incorrect, ill-formed, or impossible value.]
+
+ 554 Invalid descriptor
+
+ 555 Unmanaged entity
+
+ 650 Asynchronous event notification
+
+ Unless specified to have specific contents, the human-readable messages
+ in error replies should not be relied upon to match those in this document.
+
+4.1. Asynchronous events
+
+ These replies can be sent after a corresponding SETEVENTS command has been
+ received. They will not be interleaved with other Reply elements, but they
+ can appear between a command and its corresponding reply. For example,
+ this sequence is possible:
+
+ C: SETEVENTS CIRC
+ S: 250 OK
+ C: GETCONF SOCKSPORT ORPORT
+ S: 650 CIRC 1000 EXTENDED moria1,moria2
+ S: 250-SOCKSPORT=9050
+ S: 250 ORPORT=0
+
+ But this sequence is disallowed:
+ C: SETEVENTS CIRC
+ S: 250 OK
+ C: GETCONF SOCKSPORT ORPORT
+ S: 250-SOCKSPORT=9050
+ S: 650 CIRC 1000 EXTENDED moria1,moria2
+ S: 250 ORPORT=0
+
+ Clients MUST tolerate more arguments in an asynchonous reply than
+ expected, and MUST tolerate more lines in an asynchronous reply than
+ expected. For instance, a client that expects a CIRC message like:
+ 650 CIRC 1000 EXTENDED moria1,moria2
+ must tolerate:
+ 650-CIRC 1000 EXTENDED moria1,moria2 0xBEEF
+ 650-EXTRAMAGIC=99
+ 650 ANONYMITY=high
+
+ If clients ask for extended events, then each event line as specified below
+ will be followed by additional extensions. Additional lines will be of the
+ form
+ "650" ("-"/" ") KEYWORD ["=" ARGUMENTS] CRLF
+ Additional arguments will be of the form
+ SP KEYWORD ["=" ( QuotedString / * NonSpDquote ) ]
+ Such clients MUST tolerate lines with keywords they do not recognize.
+
+4.1.1. Circuit status changed
+
+ The syntax is:
+
+ "650" SP "CIRC" SP CircuitID SP CircStatus [SP Path]
+ [SP "REASON=" Reason [SP "REMOTE_REASON=" Reason]] CRLF
+
+ CircStatus =
+ "LAUNCHED" / ; circuit ID assigned to new circuit
+ "BUILT" / ; all hops finished, can now accept streams
+ "EXTENDED" / ; one more hop has been completed
+ "FAILED" / ; circuit closed (was not built)
+ "CLOSED" ; circuit closed (was built)
+
+ Path = ServerID *("," ServerID)
+
+ Reason = "NONE" / "TORPROTOCOL" / "INTERNAL" / "REQUESTED" /
+ "HIBERNATING" / "RESOURCELIMIT" / "CONNECTFAILED" /
+ "OR_IDENTITY" / "OR_CONN_CLOSED" / "TIMEOUT" /
+ "FINISHED" / "DESTROYED" / "NOPATH" / "NOSUCHSERVICE"
+
+ The path is provided only when the circuit has been extended at least one
+ hop.
+
+ The "REASON" field is provided only for FAILED and CLOSED events, and only
+ if extended events are enabled (see 3.19). Clients MUST accept reasons
+ not listed above. Reasons are as given in tor-spec.txt, except for:
+
+ NOPATH (Not enough nodes to make circuit)
+
+ The "REMOTE_REASON" field is provided only when we receive a DESTROY or
+ TRUNCATE cell, and only if extended events are enabled. It contains the
+ actual reason given by the remote OR for closing the circuit. Clients MUST
+ accept reasons not listed above. Reasons are as listed in tor-spec.txt.
+
+4.1.2. Stream status changed
+
+ The syntax is:
+
+ "650" SP "STREAM" SP StreamID SP StreamStatus SP CircID SP Target
+ [SP "REASON=" Reason [ SP "REMOTE_REASON=" Reason ]]
+ [SP "SOURCE=" Source] [ SP "SOURCE_ADDR=" Address ":" Port ]
+ [SP "PURPOSE=" Purpose]
+ CRLF
+
+ StreamStatus =
+ "NEW" / ; New request to connect
+ "NEWRESOLVE" / ; New request to resolve an address
+ "REMAP" / ; Address re-mapped to another
+ "SENTCONNECT" / ; Sent a connect cell along a circuit
+ "SENTRESOLVE" / ; Sent a resolve cell along a circuit
+ "SUCCEEDED" / ; Received a reply; stream established
+ "FAILED" / ; Stream failed and not retriable
+ "CLOSED" / ; Stream closed
+ "DETACHED" ; Detached from circuit; still retriable
+
+ Target = Address ":" Port
+
+ The circuit ID designates which circuit this stream is attached to. If
+ the stream is unattached, the circuit ID "0" is given.
+
+ Reason = "MISC" / "RESOLVEFAILED" / "CONNECTREFUSED" /
+ "EXITPOLICY" / "DESTROY" / "DONE" / "TIMEOUT" /
+ "HIBERNATING" / "INTERNAL"/ "RESOURCELIMIT" /
+ "CONNRESET" / "TORPROTOCOL" / "NOTDIRECTORY" / "END"
+
+ The "REASON" field is provided only for FAILED, CLOSED, and DETACHED
+ events, and only if extended events are enabled (see 3.19). Clients MUST
+ accept reasons not listed above. Reasons are as given in tor-spec.txt,
+ except for:
+
+ END (We received a RELAY_END cell from the other side of this
+ stream.)
+ [XXXX document more. -NM]
+
+ The "REMOTE_REASON" field is provided only when we receive a RELAY_END
+ cell, and only if extended events are enabled. It contains the actual
+ reason given by the remote OR for closing the stream. Clients MUST accept
+ reasons not listed above. Reasons are as listed in tor-spec.txt.
+
+ "REMAP" events include a Source if extended events are enabled:
+ Source = "CACHE" / "EXIT"
+ Clients MUST accept sources not listed above. "CACHE" is given if
+ the Tor client decided to remap the address because of a cached
+ answer, and "EXIT" is given if the remote node we queried gave us
+ the new address as a response.
+
+ The "SOURCE_ADDR" field is included with NEW and NEWRESOLVE events if
+ extended events are enabled. It indicates the address and port
+ that requested the connection, and can be (e.g.) used to look up the
+ requesting program.
+
+ Purpose = "DIR_FETCH" / "UPLOAD_DESC" / "DNS_REQUEST" /
+ "USER" / "DIRPORT_TEST"
+
+ The "PURPOSE" field is provided only for NEW and NEWRESOLVE events, and
+ only if extended events are enabled (see 3.19). Clients MUST accept
+ purposes not listed above.
+
+4.1.3. OR Connection status changed
+
+ The syntax is:
+ "650" SP "ORCONN" SP (ServerID / Target) SP ORStatus [ SP "REASON="
+ Reason ] [ SP "NCIRCS=" NumCircuits ] CRLF
+
+ ORStatus = "NEW" / "LAUNCHED" / "CONNECTED" / "FAILED" / "CLOSED"
+
+ NEW is for incoming connections, and LAUNCHED is for outgoing
+ connections. CONNECTED means the TLS handshake has finished (in
+ either direction). FAILED means a connection is being closed that
+ hasn't finished its handshake, and CLOSED is for connections that
+ have handshaked.
+
+ A ServerID is specified unless it's a NEW connection, in which
+ case we don't know what server it is yet, so we use Address:Port.
+
+ If extended events are enabled (see 3.19), optional reason and
+ circuit counting information is provided for CLOSED and FAILED
+ events.
+
+ Reason = "MISC" / "DONE" / "CONNECTREFUSED" /
+ "IDENTITY" / "CONNECTRESET" / "TIMEOUT" / "NOROUTE" /
+ "IOERROR" / "RESOURCELIMIT"
+
+ NumCircuits counts both established and pending circuits.
+
+4.1.4. Bandwidth used in the last second
+
+ The syntax is:
+ "650" SP "BW" SP BytesRead SP BytesWritten *(SP Type "=" Num) CRLF
+ BytesRead = 1*DIGIT
+ BytesWritten = 1*DIGIT
+ Type = "DIR" / "OR" / "EXIT" / "APP" / ...
+ Num = 1*DIGIT
+
+ BytesRead and BytesWritten are the totals. [In a future Tor version,
+ we may also include a breakdown of the connection types that used
+ bandwidth this second (not implemented yet).]
+
+4.1.5. Log messages
+
+ The syntax is:
+ "650" SP Severity SP ReplyText CRLF
+ or
+ "650+" Severity CRLF Data 650 SP "OK" CRLF
+
+ Severity = "DEBUG" / "INFO" / "NOTICE" / "WARN"/ "ERR"
+
+4.1.6. New descriptors available
+
+ Syntax:
+ "650" SP "NEWDESC" 1*(SP ServerID) CRLF
+
+4.1.7. New Address mapping
+
+ Syntax:
+ "650" SP "ADDRMAP" SP Address SP NewAddress SP Expiry
+ [SP Error] SP GMTExpiry CRLF
+
+ NewAddress = Address / ""
+ Expiry = DQUOTE ISOTime DQUOTE / "NEVER"
+
+ Error = "error=" ErrorCode
+ ErrorCode = XXXX
+ GMTExpiry = "EXPIRES=" DQUOTE IsoTime DQUOTE
+
+ Error and GMTExpiry are only provided if extended events are enabled.
+
+ Expiry is expressed as the local time (rather than GMT). This is a bug,
+ left in for backward compatibility; new code should look at GMTExpiry
+ instead.
+
+ These events are generated when a new address mapping is entered in the
+ cache, or when the answer for a RESOLVE command is found.
+
+4.1.8. Descriptors uploaded to us in our role as authoritative dirserver
+
+ Syntax:
+ "650" "+" "AUTHDIR_NEWDESCS" CRLF Action CRLF Message CRLF
+ Descriptor CRLF "." CRLF "650" SP "OK" CRLF
+ Action = "ACCEPTED" / "DROPPED" / "REJECTED"
+ Message = Text
+
+4.1.9. Our descriptor changed
+
+ Syntax:
+ "650" SP "DESCCHANGED" CRLF
+
+ [First added in 0.1.2.2-alpha.]
+
+4.1.10. Status events
+
+ Status events (STATUS_GENERAL, STATUS_CLIENT, and STATUS_SERVER) are sent
+ based on occurrences in the Tor process pertaining to the general state of
+ the program. Generally, they correspond to log messages of severity Notice
+ or higher. They differ from log messages in that their format is a
+ specified interface.
+
+ Syntax:
+ "650" SP StatusType SP StatusSeverity SP StatusAction
+ [SP StatusArguments] CRLF
+
+ StatusType = "STATUS_GENERAL" / "STATUS_CLIENT" / "STATUS_SERVER"
+ StatusSeverity = "NOTICE" / "WARN" / "ERR"
+ StatusAction = 1*ALPHA
+ StatusArguments = StatusArgument *(SP StatusArgument)
+ StatusArgument = StatusKeyword '=' StatusValue
+ StatusKeyword = 1*(ALNUM / "_")
+ StatusValue = 1*(ALNUM / '_') / QuotedString
+
+ Action is a string, and Arguments is a series of keyword=value
+ pairs on the same line. Values may be space-terminated strings,
+ or quoted strings.
+
+ These events are always produced with EXTENDED_EVENTS and
+ VERBOSE_NAMES; see the explanations in the USEFEATURE section
+ for details.
+
+ Controllers MUST tolerate unrecognized actions, MUST tolerate
+ unrecognized arguments, MUST tolerate missing arguments, and MUST
+ tolerate arguments that arrive in any order.
+
+ Each event description below is accompanied by a recommendation for
+ controllers. These recommendations are suggestions only; no controller
+ is required to implement them.
+
+ Compatibility note: versions of Tor before 0.2.0.22-rc incorrectly
+ generated "STATUS_SERVER" as "STATUS_SEVER". To be compatible with those
+ versions, tools should accept both.
+
+ Actions for STATUS_GENERAL events can be as follows:
+
+ CLOCK_JUMPED
+ "TIME=NUM"
+ Tor spent enough time without CPU cycles that it has closed all
+ its circuits and will establish them anew. This typically
+ happens when a laptop goes to sleep and then wakes up again. It
+ also happens when the system is swapping so heavily that Tor is
+ starving. The "time" argument specifies the number of seconds Tor
+ thinks it was unconscious for (or alternatively, the number of
+ seconds it went back in time).
+
+ This status event is sent as NOTICE severity normally, but WARN
+ severity if Tor is acting as a server currently.
+
+ {Recommendation for controller: ignore it, since we don't really
+ know what the user should do anyway. Hm.}
+
+ DANGEROUS_VERSION
+ "CURRENT=version"
+ "REASON=NEW/OBSOLETE/UNRECOMMENDED"
+ "RECOMMENDED=\"version, version, ...\""
+ Tor has found that directory servers don't recommend its version of
+ the Tor software. RECOMMENDED is a comma-and-space-separated string
+ of Tor versions that are recommended. REASON is NEW if this version
+ of Tor is newer than any recommended version, OBSOLETE if
+ this version of Tor is older than any recommended version, and
+ UNRECOMMENDED if some recommended versions of Tor are newer and
+ some are older than this version. (The "OBSOLETE" reason was called
+ "OLD" from Tor 0.1.2.3-alpha up to and including 0.2.0.12-alpha.)
+
+ {Controllers may want to suggest that the user upgrade OLD or
+ UNRECOMMENDED versions. NEW versions may be known-insecure, or may
+ simply be development versions.}
+
+ TOO_MANY_CONNECTIONS
+ "CURRENT=NUM"
+ Tor has reached its ulimit -n or whatever the native limit is on file
+ descriptors or sockets. CURRENT is the number of sockets Tor
+ currently has open. The user should really do something about
+ this. The "current" argument shows the number of connections currently
+ open.
+
+ {Controllers may recommend that the user increase the limit, or
+ increase it for them. Recommendations should be phrased in an
+ OS-appropriate way and automated when possible.}
+
+ BUG
+ "REASON=STRING"
+ Tor has encountered a situation that its developers never expected,
+ and the developers would like to learn that it happened. Perhaps
+ the controller can explain this to the user and encourage her to
+ file a bug report?
+
+ {Controllers should log bugs, but shouldn't annoy the user in case a
+ bug appears frequently.}
+
+ CLOCK_SKEW
+ SKEW="+" / "-" SECONDS
+ MIN_SKEW="+" / "-" SECONDS.
+ SOURCE="DIRSERV:" IP ":" Port /
+ "NETWORKSTATUS:" IP ":" Port /
+ "OR:" IP ":" Port /
+ "CONSENSUS"
+ If "SKEW" is present, it's an estimate of how far we are from the
+ time declared in the source. (In other words, if we're an hour in
+ the past, the value is -3600.) "MIN_SKEW" is present, it's a lower
+ bound. If the source is a DIRSERV, we got the current time from a
+ connection to a dirserver. If the source is a NETWORKSTATUS, we
+ decided we're skewed because we got a v2 networkstatus from far in
+ the future. If the source is OR, the skew comes from a NETINFO
+ cell from a connection to another relay. If the source is
+ CONSENSUS, we decided we're skewed because we got a networkstatus
+ consensus from the future.
+
+ {Tor should send this message to controllers when it thinks the
+ skew is so high that it will interfere with proper Tor operation.
+ Controllers shouldn't blindly adjust the clock, since the more
+ accurate source of skew info (DIRSERV) is currently
+ unauthenticated.}
+
+ BAD_LIBEVENT
+ "METHOD=" libevent method
+ "VERSION=" libevent version
+ "BADNESS=" "BROKEN" / "BUGGY" / "SLOW"
+ "RECOVERED=" "NO" / "YES"
+ Tor knows about bugs in using the configured event method in this
+ version of libevent. "BROKEN" libevents won't work at all;
+ "BUGGY" libevents might work okay; "SLOW" libevents will work
+ fine, but not quickly. If "RECOVERED" is YES, Tor managed to
+ switch to a more reliable (but probably slower!) libevent method.
+
+ {Controllers may want to warn the user if this event occurs, though
+ generally it's the fault of whoever built the Tor binary and there's
+ not much the user can do besides upgrade libevent or upgrade the
+ binary.}
+
+ DIR_ALL_UNREACHABLE
+ Tor believes that none of the known directory servers are
+ reachable -- this is most likely because the local network is
+ down or otherwise not working, and might help to explain for the
+ user why Tor appears to be broken.
+
+ {Controllers may want to warn the user if this event occurs; further
+ action is generally not possible.}
+
+ CONSENSUS_ARRIVED
+ Tor has received and validated a new consensus networkstatus.
+ (This event can be delayed a little while after the consensus
+ is received, if Tor needs to fetch certificates.)
+
+ Actions for STATUS_CLIENT events can be as follows:
+
+ BOOTSTRAP
+ "PROGRESS=" num
+ "TAG=" Keyword
+ "SUMMARY=" String
+ ["WARNING=" String
+ "REASON=" Keyword
+ "COUNT=" num
+ "RECOMMENDATION=" Keyword
+ ]
+
+ Tor has made some progress at establishing a connection to the
+ Tor network, fetching directory information, or making its first
+ circuit; or it has encountered a problem while bootstrapping. This
+ status event is especially useful for users with slow connections
+ or with connectivity problems.
+
+ "Progress" gives a number between 0 and 100 for how far through
+ the bootstrapping process we are. "Summary" is a string that can
+ be displayed to the user to describe the *next* task that Tor
+ will tackle, i.e., the task it is working on after sending the
+ status event. "Tag" is a string that controllers can use to
+ recognize bootstrap phases, if they want to do something smarter
+ than just blindly displaying the summary string; see Section 5
+ for the current tags that Tor issues.
+
+ The StatusSeverity describes whether this is a normal bootstrap
+ phase (severity notice) or an indication of a bootstrapping
+ problem (severity warn).
+
+ For bootstrap problems, we include the same progress, tag, and
+ summary values as we would for a normal bootstrap event, but we
+ also include "warning", "reason", "count", and "recommendation"
+ key/value combos. The "count" number tells how many bootstrap
+ problems there have been so far at this phase. The "reason"
+ string lists one of the reasons allowed in the ORCONN event. The
+ "warning" argument string with any hints Tor has to offer about
+ why it's having troubles bootstrapping.
+
+ The "reason" values are long-term-stable controller-facing tags to
+ identify particular issues in a bootstrapping step. The warning
+ strings, on the other hand, are human-readable. Controllers
+ SHOULD NOT rely on the format of any warning string. Currently
+ the possible values for "recommendation" are either "ignore" or
+ "warn" -- if ignore, the controller can accumulate the string in
+ a pile of problems to show the user if the user asks; if warn,
+ the controller should alert the user that Tor is pretty sure
+ there's a bootstrapping problem.
+
+ Currently Tor uses recommendation=ignore for the first
+ nine bootstrap problem reports for a given phase, and then
+ uses recommendation=warn for subsequent problems at that
+ phase. Hopefully this is a good balance between tolerating
+ occasional errors and reporting serious problems quickly.
+
+ ENOUGH_DIR_INFO
+ Tor now knows enough network-status documents and enough server
+ descriptors that it's going to start trying to build circuits now.
+
+ {Controllers may want to use this event to decide when to indicate
+ progress to their users, but should not interrupt the user's browsing
+ to tell them so.}
+
+ NOT_ENOUGH_DIR_INFO
+ We discarded expired statuses and router descriptors to fall
+ below the desired threshold of directory information. We won't
+ try to build any circuits until ENOUGH_DIR_INFO occurs again.
+
+ {Controllers may want to use this event to decide when to indicate
+ progress to their users, but should not interrupt the user's browsing
+ to tell them so.}
+
+ CIRCUIT_ESTABLISHED
+ Tor is able to establish circuits for client use. This event will
+ only be sent if we just built a circuit that changed our mind --
+ that is, prior to this event we didn't know whether we could
+ establish circuits.
+
+ {Suggested use: controllers can notify their users that Tor is
+ ready for use as a client once they see this status event. [Perhaps
+ controllers should also have a timeout if too much time passes and
+ this event hasn't arrived, to give tips on how to troubleshoot.
+ On the other hand, hopefully Tor will send further status events
+ if it can identify the problem.]}
+
+ CIRCUIT_NOT_ESTABLISHED
+ "REASON=" "EXTERNAL_ADDRESS" / "DIR_ALL_UNREACHABLE" / "CLOCK_JUMPED"
+ We are no longer confident that we can build circuits. The "reason"
+ keyword provides an explanation: which other status event type caused
+ our lack of confidence.
+
+ {Controllers may want to use this event to decide when to indicate
+ progress to their users, but should not interrupt the user's browsing
+ to do so.}
+ [Note: only REASON=CLOCK_JUMPED is implemented currently.]
+
+ DANGEROUS_PORT
+ "PORT=" port
+ "RESULT=" "REJECT" / "WARN"
+ A stream was initiated to a port that's commonly used for
+ vulnerable-plaintext protocols. If the Result is "reject", we
+ refused the connection; whereas if it's "warn", we allowed it.
+
+ {Controllers should warn their users when this occurs, unless they
+ happen to know that the application using Tor is in fact doing so
+ correctly (e.g., because it is part of a distributed bundle). They
+ might also want some sort of interface to let the user configure
+ their RejectPlaintextPorts and WarnPlaintextPorts config options.}
+
+ DANGEROUS_SOCKS
+ "PROTOCOL=" "SOCKS4" / "SOCKS5"
+ "ADDRESS=" IP:port
+ A connection was made to Tor's SOCKS port using one of the SOCKS
+ approaches that doesn't support hostnames -- only raw IP addresses.
+ If the client application got this address from gethostbyname(),
+ it may be leaking target addresses via DNS.
+
+ {Controllers should warn their users when this occurs, unless they
+ happen to know that the application using Tor is in fact doing so
+ correctly (e.g., because it is part of a distributed bundle).}
+
+ SOCKS_UNKNOWN_PROTOCOL
+ "DATA=string"
+ A connection was made to Tor's SOCKS port that tried to use it
+ for something other than the SOCKS protocol. Perhaps the user is
+ using Tor as an HTTP proxy? The DATA is the first few characters
+ sent to Tor on the SOCKS port.
+
+ {Controllers may want to warn their users when this occurs: it
+ indicates a misconfigured application.}
+
+ SOCKS_BAD_HOSTNAME
+ "HOSTNAME=QuotedString"
+ Some application gave us a funny-looking hostname. Perhaps
+ it is broken? In any case it won't work with Tor and the user
+ should know.
+
+ {Controllers may want to warn their users when this occurs: it
+ usually indicates a misconfigured application.}
+
+ Actions for STATUS_SERVER can be as follows:
+
+ EXTERNAL_ADDRESS
+ "ADDRESS=IP"
+ "HOSTNAME=NAME"
+ "METHOD=CONFIGURED/DIRSERV/RESOLVED/INTERFACE/GETHOSTNAME"
+ Our best idea for our externally visible IP has changed to 'IP'.
+ If 'HOSTNAME' is present, we got the new IP by resolving 'NAME'. If the
+ method is 'CONFIGURED', the IP was given verbatim as a configuration
+ option. If the method is 'RESOLVED', we resolved the Address
+ configuration option to get the IP. If the method is 'GETHOSTNAME',
+ we resolved our hostname to get the IP. If the method is 'INTERFACE',
+ we got the address of one of our network interfaces to get the IP. If
+ the method is 'DIRSERV', a directory server told us a guess for what
+ our IP might be.
+
+ {Controllers may want to record this info and display it to the user.}
+
+ CHECKING_REACHABILITY
+ "ORADDRESS=IP:port"
+ "DIRADDRESS=IP:port"
+ We're going to start testing the reachability of our external OR port
+ or directory port.
+
+ {This event could affect the controller's idea of server status, but
+ the controller should not interrupt the user to tell them so.}
+
+ REACHABILITY_SUCCEEDED
+ "ORADDRESS=IP:port"
+ "DIRADDRESS=IP:port"
+ We successfully verified the reachability of our external OR port or
+ directory port (depending on which of ORADDRESS or DIRADDRESS is
+ given.)
+
+ {This event could affect the controller's idea of server status, but
+ the controller should not interrupt the user to tell them so.}
+
+ GOOD_SERVER_DESCRIPTOR
+ We successfully uploaded our server descriptor to at least one
+ of the directory authorities, with no complaints.
+
+ {Originally, the goal of this event was to declare "every authority
+ has accepted the descriptor, so there will be no complaints
+ about it." But since some authorities might be offline, it's
+ harder to get certainty than we had thought. As such, this event
+ is equivalent to ACCEPTED_SERVER_DESCRIPTOR below. Controllers
+ should just look at ACCEPTED_SERVER_DESCRIPTOR and should ignore
+ this event for now.}
+
+ NAMESERVER_STATUS
+ "NS=addr"
+ "STATUS=" "UP" / "DOWN"
+ "ERR=" message
+ One of our nameservers has changed status.
+
+ {This event could affect the controller's idea of server status, but
+ the controller should not interrupt the user to tell them so.}
+
+ NAMESERVER_ALL_DOWN
+ All of our nameservers have gone down.
+
+ {This is a problem; if it happens often without the nameservers
+ coming up again, the user needs to configure more or better
+ nameservers.}
+
+ DNS_HIJACKED
+ Our DNS provider is providing an address when it should be saying
+ "NOTFOUND"; Tor will treat the address as a synonym for "NOTFOUND".
+
+ {This is an annoyance; controllers may want to tell admins that their
+ DNS provider is not to be trusted.}
+
+ DNS_USELESS
+ Our DNS provider is giving a hijacked address instead of well-known
+ websites; Tor will not try to be an exit node.
+
+ {Controllers could warn the admin if the server is running as an
+ exit server: the admin needs to configure a good DNS server.
+ Alternatively, this happens a lot in some restrictive environments
+ (hotels, universities, coffeeshops) when the user hasn't registered.}
+
+ BAD_SERVER_DESCRIPTOR
+ "DIRAUTH=addr:port"
+ "REASON=string"
+ A directory authority rejected our descriptor. Possible reasons
+ include malformed descriptors, incorrect keys, highly skewed clocks,
+ and so on.
+
+ {Controllers should warn the admin, and try to cope if they can.}
+
+ ACCEPTED_SERVER_DESCRIPTOR
+ "DIRAUTH=addr:port"
+ A single directory authority accepted our descriptor.
+ // actually notice
+
+ {This event could affect the controller's idea of server status, but
+ the controller should not interrupt the user to tell them so.}
+
+ REACHABILITY_FAILED
+ "ORADDRESS=IP:port"
+ "DIRADDRESS=IP:port"
+ We failed to connect to our external OR port or directory port
+ successfully.
+
+ {This event could affect the controller's idea of server status. The
+ controller should warn the admin and suggest reasonable steps to take.}
+
+4.1.11. Our set of guard nodes has changed
+
+ Syntax:
+ "650" SP "GUARD" SP Type SP Name SP Status ... CRLF
+ Type = "ENTRY"
+ Name = The (possibly verbose) nickname of the guard affected.
+ Status = "NEW" | "UP" | "DOWN" | "BAD" | "GOOD" | "DROPPED"
+
+ [explain states. XXX]
+
+4.1.12. Network status has changed
+
+ Syntax:
+ "650" "+" "NS" CRLF 1*NetworkStatus "." CRLF "650" SP "OK" CRLF
+
+ The event is used whenever our local view of a relay status changes.
+ This happens when we get a new v3 consensus (in which case the entries
+ we see are a duplicate of what we see in the NEWCONSENSUS event,
+ below), but it also happens when we decide to mark a relay as up or
+ down in our local status, for example based on connection attempts.
+
+ [First added in 0.1.2.3-alpha]
+
+4.1.13. Bandwidth used on an application stream
+
+ The syntax is:
+ "650" SP "STREAM_BW" SP StreamID SP BytesRead SP BytesWritten CRLF
+ BytesRead = 1*DIGIT
+ BytesWritten = 1*DIGIT
+
+ BytesRead and BytesWritten are the number of bytes read and written since
+ the last STREAM_BW event on this stream. These events are generated about
+ once per second per stream; no events are generated for streams that have
+ not read or written.
+
+ These events apply only to streams entering Tor (such as on a SOCKSPort,
+ TransPort, or so on). They are not generated for exiting streams.
+
+4.1.14. Per-country client stats
+
+ The syntax is:
+ "650" SP "CLIENTS_SEEN" SP TimeStarted SP CountrySummary CRLF
+
+ We just generated a new summary of which countries we've seen clients
+ from recently. The controller could display this for the user, e.g.
+ in their "relay" configuration window, to give them a sense that they
+ are actually being useful.
+
+ Currently only bridge relays will receive this event, but once we figure
+ out how to sufficiently aggregate and sanitize the client counts on
+ main relays, we might start sending these events in other cases too.
+
+ TimeStarted is a quoted string indicating when the reported summary
+ counts from (in GMT).
+
+ The CountrySummary keyword has as its argument a comma-separated
+ set of "countrycode=count" pairs. For example,
+ 650-CLIENTS_SEEN TimeStarted="Thu Dec 25 23:50:43 EST 2008"
+ 650 CountrySummary=us=16,de=8,uk=8
+[XXX Matt Edman informs me that the time format above is wrong. -RD]
+
+4.1.15. New consensus networkstatus has arrived.
+
+ The syntax is:
+ "650" "+" "NEWCONSENSUS" CRLF 1*NetworkStatus "." CRLF "650" SP
+ "OK" CRLF
+
+ A new consensus networkstatus has arrived. We include NS-style lines for
+ every relay in the consensus. NEWCONSENSUS is a separate event from the
+ NS event, because the list here represents every usable relay: so any
+ relay *not* mentioned in this list is implicitly no longer recommended.
+
+ [First added in 0.2.1.13-alpha]
+
+5. Implementation notes
+
+5.1. Authentication
+
+ If the control port is open and no authentication operation is enabled, Tor
+ trusts any local user that connects to the control port. This is generally
+ a poor idea.
+
+ If the 'CookieAuthentication' option is true, Tor writes a "magic cookie"
+ file named "control_auth_cookie" into its data directory. To authenticate,
+ the controller must send the contents of this file, encoded in hexadecimal.
+
+ If the 'HashedControlPassword' option is set, it must contain the salted
+ hash of a secret password. The salted hash is computed according to the
+ S2K algorithm in RFC 2440 (OpenPGP), and prefixed with the s2k specifier.
+ This is then encoded in hexadecimal, prefixed by the indicator sequence
+ "16:". Thus, for example, the password 'foo' could encode to:
+ 16:660537E3E1CD49996044A3BF558097A981F539FEA2F9DA662B4626C1C2
+ ++++++++++++++++**^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ salt hashed value
+ indicator
+ You can generate the salt of a password by calling
+ 'tor --hash-password '
+ or by using the example code in the Python and Java controller libraries.
+ To authenticate under this scheme, the controller sends Tor the original
+ secret that was used to generate the password, either as a quoted string
+ or encoded in hexadecimal.
+
+5.2. Don't let the buffer get too big.
+
+ If you ask for lots of events, and 16MB of them queue up on the buffer,
+ the Tor process will close the socket.
+
+5.3. Backward compatibility with v0 control protocol.
+
+ The 'version 0' control protocol was replaced in Tor 0.1.1.x. Support
+ was removed in Tor 0.2.0.x. Every non-obsolete version of Tor now
+ supports the version 1 control protocol.
+
+ For backward compatibility with the "version 0" control protocol,
+ Tor used to check whether the third octet of the first command is zero.
+ (If it was, Tor assumed that version 0 is in use.)
+
+ This compatibility was removed in Tor 0.1.2.16 and 0.2.0.4-alpha.
+
+5.4. Tor config options for use by controllers
+
+ Tor provides a few special configuration options for use by controllers.
+ These options can be set and examined by the SETCONF and GETCONF commands,
+ but are not saved to disk by SAVECONF.
+
+ Generally, these options make Tor unusable by disabling a portion of Tor's
+ normal operations. Unless a controller provides replacement functionality
+ to fill this gap, Tor will not correctly handle user requests.
+
+ __AllDirOptionsPrivate
+
+ If true, Tor will try to launch all directory operations through
+ anonymous connections. (Ordinarily, Tor only tries to anonymize
+ requests related to hidden services.) This option will slow down
+ directory access, and may stop Tor from working entirely if it does not
+ yet have enough directory information to build circuits.
+
+ (Boolean. Default: "0".)
+
+ __DisablePredictedCircuits
+
+ If true, Tor will not launch preemptive "general-purpose" circuits for
+ streams to attach to. (It will still launch circuits for testing and
+ for hidden services.)
+
+ (Boolean. Default: "0".)
+
+ __LeaveStreamsUnattached
+
+ If true, Tor will not automatically attach new streams to circuits;
+ instead, the controller must attach them with ATTACHSTREAM. If the
+ controller does not attach the streams, their data will never be routed.
+
+ (Boolean. Default: "0".)
+
+ __HashedControlSessionPassword
+
+ As HashedControlPassword, but is not saved to the torrc file by
+ SAVECONF. Added in Tor 0.2.0.20-rc.
+
+ __ReloadTorrcOnSIGHUP
+
+ If this option is true (the default), we reload the torrc from disk
+ every time we get a SIGHUP (from the controller or via a signal).
+ Otherwise, we don't. This option exists so that controllers can keep
+ their options from getting overwritten when a user sends Tor a HUP for
+ some other reason (for example, to rotate the logs).
+
+ (Boolean. Default: "1")
+
+5.5. Phases from the Bootstrap status event.
+
+ This section describes the various bootstrap phases currently reported
+ by Tor. Controllers should not assume that the percentages and tags
+ listed here will continue to match up, or even that the tags will stay
+ in the same order. Some phases might also be skipped (not reported)
+ if the associated bootstrap step is already complete, or if the phase
+ no longer is necessary. Only "starting" and "done" are guaranteed to
+ exist in all future versions.
+
+ Current Tor versions enter these phases in order, monotonically.
+ Future Tors MAY revisit earlier stages.
+
+ Phase 0:
+ tag=starting summary="Starting"
+
+ Tor starts out in this phase.
+
+ Phase 5:
+ tag=conn_dir summary="Connecting to directory mirror"
+
+ Tor sends this event as soon as Tor has chosen a directory mirror --
+ e.g. one of the authorities if bootstrapping for the first time or
+ after a long downtime, or one of the relays listed in its cached
+ directory information otherwise.
+
+ Tor will stay at this phase until it has successfully established
+ a TCP connection with some directory mirror. Problems in this phase
+ generally happen because Tor doesn't have a network connection, or
+ because the local firewall is dropping SYN packets.
+
+ Phase 10:
+ tag=handshake_dir summary="Finishing handshake with directory mirror"
+
+ This event occurs when Tor establishes a TCP connection with a relay used
+ as a directory mirror (or its https proxy if it's using one). Tor remains
+ in this phase until the TLS handshake with the relay is finished.
+
+ Problems in this phase generally happen because Tor's firewall is
+ doing more sophisticated MITM attacks on it, or doing packet-level
+ keyword recognition of Tor's handshake.
+
+ Phase 15:
+ tag=onehop_create summary="Establishing one-hop circuit for dir info"
+
+ Once TLS is finished with a relay, Tor will send a CREATE_FAST cell
+ to establish a one-hop circuit for retrieving directory information.
+ It will remain in this phase until it receives the CREATED_FAST cell
+ back, indicating that the circuit is ready.
+
+ Phase 20:
+ tag=requesting_status summary="Asking for networkstatus consensus"
+
+ Once we've finished our one-hop circuit, we will start a new stream
+ for fetching the networkstatus consensus. We'll stay in this phase
+ until we get the 'connected' relay cell back, indicating that we've
+ established a directory connection.
+
+ Phase 25:
+ tag=loading_status summary="Loading networkstatus consensus"
+
+ Once we've established a directory connection, we will start fetching
+ the networkstatus consensus document. This could take a while; this
+ phase is a good opportunity for using the "progress" keyword to indicate
+ partial progress.
+
+ This phase could stall if the directory mirror we picked doesn't
+ have a copy of the networkstatus consensus so we have to ask another,
+ or it does give us a copy but we don't find it valid.
+
+ Phase 40:
+ tag=loading_keys summary="Loading authority key certs"
+
+ Sometimes when we've finished loading the networkstatus consensus,
+ we find that we don't have all the authority key certificates for the
+ keys that signed the consensus. At that point we put the consensus we
+ fetched on hold and fetch the keys so we can verify the signatures.
+
+ Phase 45
+ tag=requesting_descriptors summary="Asking for relay descriptors"
+
+ Once we have a valid networkstatus consensus and we've checked all
+ its signatures, we start asking for relay descriptors. We stay in this
+ phase until we have received a 'connected' relay cell in response to
+ a request for descriptors.
+
+ Phase 50:
+ tag=loading_descriptors summary="Loading relay descriptors"
+
+ We will ask for relay descriptors from several different locations,
+ so this step will probably make up the bulk of the bootstrapping,
+ especially for users with slow connections. We stay in this phase until
+ we have descriptors for at least 1/4 of the usable relays listed in
+ the networkstatus consensus. This phase is also a good opportunity to
+ use the "progress" keyword to indicate partial steps.
+
+ Phase 80:
+ tag=conn_or summary="Connecting to entry guard"
+
+ Once we have a valid consensus and enough relay descriptors, we choose
+ some entry guards and start trying to build some circuits. This step
+ is similar to the "conn_dir" phase above; the only difference is
+ the context.
+
+ If a Tor starts with enough recent cached directory information,
+ its first bootstrap status event will be for the conn_or phase.
+
+ Phase 85:
+ tag=handshake_or summary="Finishing handshake with entry guard"
+
+ This phase is similar to the "handshake_dir" phase, but it gets reached
+ if we finish a TCP connection to a Tor relay and we have already reached
+ the "conn_or" phase. We'll stay in this phase until we complete a TLS
+ handshake with a Tor relay.
+
+ Phase 90:
+ tag=circuit_create summary="Establishing circuits"
+
+ Once we've finished our TLS handshake with an entry guard, we will
+ set about trying to make some 3-hop circuits in case we need them soon.
+
+ Phase 100:
+ tag=done summary="Done"
+
+ A full 3-hop exit circuit has been established. Tor is ready to handle
+ application connections now.
+
diff --git a/orchid/doc/spec/dir-spec.txt b/orchid/doc/spec/dir-spec.txt
new file mode 100644
index 00000000..faa3a660
--- /dev/null
+++ b/orchid/doc/spec/dir-spec.txt
@@ -0,0 +1,2132 @@
+
+ Tor directory protocol, version 3
+
+0. Scope and preliminaries
+
+ This directory protocol is used by Tor version 0.2.0.x-alpha and later.
+ See dir-spec-v1.txt for information on the protocol used up to the
+ 0.1.0.x series, and dir-spec-v2.txt for information on the protocol
+ used by the 0.1.1.x and 0.1.2.x series.
+
+ Caches and authorities must still support older versions of the
+ directory protocols, until the versions of Tor that require them are
+ finally out of commission. See Section XXXX on backward compatibility.
+
+ This document merges and supersedes the following proposals:
+
+ 101 Voting on the Tor Directory System
+ 103 Splitting identity key from regularly used signing key
+ 104 Long and Short Router Descriptors
+
+ AS OF 14 JUNE 2007, THIS SPECIFICATION HAS NOT YET BEEN COMPLETELY
+ IMPLEMENTED, OR COMPLETELY COMPLETED.
+
+ XXX when to download certificates.
+ XXX timeline
+ XXX fill in XXXXs
+
+0.1. History
+
+ The earliest versions of Onion Routing shipped with a list of known
+ routers and their keys. When the set of routers changed, users needed to
+ fetch a new list.
+
+ The Version 1 Directory protocol
+ --------------------------------
+
+ Early versions of Tor (0.0.2) introduced "Directory authorities": servers
+ that served signed "directory" documents containing a list of signed
+ "router descriptors", along with short summary of the status of each
+ router. Thus, clients could get up-to-date information on the state of
+ the network automatically, and be certain that the list they were getting
+ was attested by a trusted directory authority.
+
+ Later versions (0.0.8) added directory caches, which download
+ directories from the authorities and serve them to clients. Non-caches
+ fetch from the caches in preference to fetching from the authorities, thus
+ distributing bandwidth requirements.
+
+ Also added during the version 1 directory protocol were "router status"
+ documents: short documents that listed only the up/down status of the
+ routers on the network, rather than a complete list of all the
+ descriptors. Clients and caches would fetch these documents far more
+ frequently than they would fetch full directories.
+
+ The Version 2 Directory Protocol
+ --------------------------------
+
+ During the Tor 0.1.1.x series, Tor revised its handling of directory
+ documents in order to address two major problems:
+
+ * Directories had grown quite large (over 1MB), and most directory
+ downloads consisted mainly of router descriptors that clients
+ already had.
+
+ * Every directory authority was a trust bottleneck: if a single
+ directory authority lied, it could make clients believe for a time
+ an arbitrarily distorted view of the Tor network. (Clients
+ trusted the most recent signed document they downloaded.) Thus,
+ adding more authorities would make the system less secure, not
+ more.
+
+ To address these, we extended the directory protocol so that
+ authorities now published signed "network status" documents. Each
+ network status listed, for every router in the network: a hash of its
+ identity key, a hash of its most recent descriptor, and a summary of
+ what the authority believed about its status. Clients would download
+ the authorities' network status documents in turn, and believe
+ statements about routers iff they were attested to by more than half of
+ the authorities.
+
+ Instead of downloading all router descriptors at once, clients
+ downloaded only the descriptors that they did not have. Descriptors
+ were indexed by their digests, in order to prevent malicious caches
+ from giving different versions of a router descriptor to different
+ clients.
+
+ Routers began working harder to upload new descriptors only when their
+ contents were substantially changed.
+
+
+0.2. Goals of the version 3 protocol
+
+ Version 3 of the Tor directory protocol tries to solve the following
+ issues:
+
+ * A great deal of bandwidth used to transmit router descriptors was
+ used by two fields that are not actually used by Tor routers
+ (namely read-history and write-history). We save about 60% by
+ moving them into a separate document that most clients do not
+ fetch or use.
+
+ * It was possible under certain perverse circumstances for clients
+ to download an unusual set of network status documents, thus
+ partitioning themselves from clients who have a more recent and/or
+ typical set of documents. Even under the best of circumstances,
+ clients were sensitive to the ages of the network status documents
+ they downloaded. Therefore, instead of having the clients
+ correlate multiple network status documents, we have the
+ authorities collectively vote on a single consensus network status
+ document.
+
+ * The most sensitive data in the entire network (the identity keys
+ of the directory authorities) needed to be stored unencrypted so
+ that the authorities can sign network-status documents on the fly.
+ Now, the authorities' identity keys are stored offline, and used
+ to certify medium-term signing keys that can be rotated.
+
+0.3. Some Remaining questions
+
+ Things we could solve on a v3 timeframe:
+
+ The SHA-1 hash is showing its age. We should do something about our
+ dependency on it. We could probably future-proof ourselves here in
+ this revision, at least so far as documents from the authorities are
+ concerned.
+
+ Too many things about the authorities are hardcoded by IP.
+
+ Perhaps we should start accepting longer identity keys for routers
+ too.
+
+ Things to solve eventually:
+
+ Requiring every client to know about every router won't scale forever.
+
+ Requiring every directory cache to know every router won't scale
+ forever.
+
+
+1. Outline
+
+ There is a small set (say, around 5-10) of semi-trusted directory
+ authorities. A default list of authorities is shipped with the Tor
+ software. Users can change this list, but are encouraged not to do so,
+ in order to avoid partitioning attacks.
+
+ Every authority has a very-secret, long-term "Authority Identity Key".
+ This is stored encrypted and/or offline, and is used to sign "key
+ certificate" documents. Every key certificate contains a medium-term
+ (3-12 months) "authority signing key", that is used by the authority to
+ sign other directory information. (Note that the authority identity
+ key is distinct from the router identity key that the authority uses
+ in its role as an ordinary router.)
+
+ Routers periodically upload signed "routers descriptors" to the
+ directory authorities describing their keys, capabilities, and other
+ information. Routers may also upload signed "extra info documents"
+ containing information that is not required for the Tor protocol.
+ Directory authorities serve router descriptors indexed by router
+ identity, or by hash of the descriptor.
+
+ Routers may act as directory caches to reduce load on the directory
+ authorities. They announce this in their descriptors.
+
+ Periodically, each directory authority generates a view of
+ the current descriptors and status for known routers. They send a
+ signed summary of this view (a "status vote") to the other
+ authorities. The authorities compute the result of this vote, and sign
+ a "consensus status" document containing the result of the vote.
+
+ Directory caches download, cache, and re-serve consensus documents.
+
+ Clients, directory caches, and directory authorities all use consensus
+ documents to find out when their list of routers is out-of-date.
+ (Directory authorities also use vote statuses.) If it is, they download
+ any missing router descriptors. Clients download missing descriptors
+ from caches; caches and authorities download from authorities.
+ Descriptors are downloaded by the hash of the descriptor, not by the
+ server's identity key: this prevents servers from attacking clients by
+ giving them descriptors nobody else uses.
+
+ All directory information is uploaded and downloaded with HTTP.
+
+ [Authorities also generate and caches also cache documents produced and
+ used by earlier versions of this protocol; see section XXX for notes.]
+
+1.1. What's different from version 2?
+
+ Clients used to download multiple network status documents,
+ corresponding roughly to "status votes" above. They would compute the
+ result of the vote on the client side.
+
+ Authorities used to sign documents using the same private keys they used
+ for their roles as routers. This forced them to keep these extremely
+ sensitive keys in memory unencrypted.
+
+ All of the information in extra-info documents used to be kept in the
+ main descriptors.
+
+1.2. Document meta-format
+
+ Router descriptors, directories, and running-routers documents all obey the
+ following lightweight extensible information format.
+
+ The highest level object is a Document, which consists of one or more
+ Items. Every Item begins with a KeywordLine, followed by zero or more
+ Objects. A KeywordLine begins with a Keyword, optionally followed by
+ whitespace and more non-newline characters, and ends with a newline. A
+ Keyword is a sequence of one or more characters in the set [A-Za-z0-9-].
+ An Object is a block of encoded data in pseudo-Open-PGP-style
+ armor. (cf. RFC 2440)
+
+ More formally:
+
+ NL = The ascii LF character (hex value 0x0a).
+ Document ::= (Item | NL)+
+ Item ::= KeywordLine Object*
+ KeywordLine ::= Keyword NL | Keyword WS ArgumentChar+ NL
+ Keyword = KeywordChar+
+ KeywordChar ::= 'A' ... 'Z' | 'a' ... 'z' | '0' ... '9' | '-'
+ ArgumentChar ::= any printing ASCII character except NL.
+ WS = (SP | TAB)+
+ Object ::= BeginLine Base-64-encoded-data EndLine
+ BeginLine ::= "-----BEGIN " Keyword "-----" NL
+ EndLine ::= "-----END " Keyword "-----" NL
+
+ The BeginLine and EndLine of an Object must use the same keyword.
+
+ When interpreting a Document, software MUST ignore any KeywordLine that
+ starts with a keyword it doesn't recognize; future implementations MUST NOT
+ require current clients to understand any KeywordLine not currently
+ described.
+
+ The "opt" keyword was used until Tor 0.1.2.5-alpha for non-critical future
+ extensions. All implementations MUST ignore any item of the form "opt
+ keyword ....." when they would not recognize "keyword ....."; and MUST
+ treat "opt keyword ....." as synonymous with "keyword ......" when keyword
+ is recognized.
+
+ Implementations before 0.1.2.5-alpha rejected any document with a
+ KeywordLine that started with a keyword that they didn't recognize.
+ When generating documents that need to be read by older versions of Tor,
+ implementations MUST prefix items not recognized by older versions of
+ Tor with an "opt" until those versions of Tor are obsolete. [Note that
+ key certificates, status vote documents, extra info documents, and
+ status consensus documents will never be read by older versions of Tor.]
+
+ Other implementations that want to extend Tor's directory format MAY
+ introduce their own items. The keywords for extension items SHOULD start
+ with the characters "x-" or "X-", to guarantee that they will not conflict
+ with keywords used by future versions of Tor.
+
+ In our document descriptions below, we tag Items with a multiplicity in
+ brackets. Possible tags are:
+
+ "At start, exactly once": These items MUST occur in every instance of
+ the document type, and MUST appear exactly once, and MUST be the
+ first item in their documents.
+
+ "Exactly once": These items MUST occur exactly one time in every
+ instance of the document type.
+
+ "At end, exactly once": These items MUST occur in every instance of
+ the document type, and MUST appear exactly once, and MUST be the
+ last item in their documents.
+
+ "At most once": These items MAY occur zero or one times in any
+ instance of the document type, but MUST NOT occur more than once.
+
+ "Any number": These items MAY occur zero, one, or more times in any
+ instance of the document type.
+
+ "Once or more": These items MUST occur at least once in any instance
+ of the document type, and MAY occur more.
+
+1.3. Signing documents
+
+ Every signable document below is signed in a similar manner, using a
+ given "Initial Item", a final "Signature Item", a digest algorithm, and
+ a signing key.
+
+ The Initial Item must be the first item in the document.
+
+ The Signature Item has the following format:
+
+ [arguments] NL SIGNATURE NL
+
+ The "SIGNATURE" Object contains a signature (using the signing key) of
+ the PKCS1-padded digest of the entire document, taken from the
+ beginning of the Initial item, through the newline after the Signature
+ Item's keyword and its arguments.
+
+ Unless otherwise, the digest algorithm is SHA-1.
+
+ All documents are invalid unless signed with the correct signing key.
+
+ The "Digest" of a document, unless stated otherwise, is its digest *as
+ signed by this signature scheme*.
+
+1.4. Voting timeline
+
+ Every consensus document has a "valid-after" (VA) time, a "fresh-until"
+ (FU) time and a "valid-until" (VU) time. VA MUST precede FU, which MUST
+ in turn precede VU. Times are chosen so that every consensus will be
+ "fresh" until the next consensus becomes valid, and "valid" for a while
+ after. At least 3 consensuses should be valid at any given time.
+
+ The timeline for a given consensus is as follows:
+
+ VA-DistSeconds-VoteSeconds: The authorities exchange votes.
+
+ VA-DistSeconds-VoteSeconds/2: The authorities try to download any
+ votes they don't have.
+
+ VA-DistSeconds: The authorities calculate the consensus and exchange
+ signatures.
+
+ VA-DistSeconds/2: The authorities try to download any signatures
+ they don't have.
+
+ VA: All authorities have a multiply signed consensus.
+
+ VA ... FU: Caches download the consensus. (Note that since caches have
+ no way of telling what VA and FU are until they have downloaded
+ the consensus, they assume that the present consensus's VA is
+ equal to the previous one's FU, and that its FU is one interval after
+ that.)
+
+ FU: The consensus is no longer the freshest consensus.
+
+ FU ... (the current consensus's VU): Clients download the consensus.
+ (See note above: clients guess that the next consensus's FU will be
+ two intervals after the current VA.)
+
+ VU: The consensus is no longer valid.
+
+ VoteSeconds and DistSeconds MUST each be at least 20 seconds; FU-VA and
+ VU-FU MUST each be at least 5 minutes.
+
+2. Router operation and formats
+
+ ORs SHOULD generate a new router descriptor and a new extra-info
+ document whenever any of the following events have occurred:
+
+ - A period of time (18 hrs by default) has passed since the last
+ time a descriptor was generated.
+
+ - A descriptor field other than bandwidth or uptime has changed.
+
+ - Bandwidth has changed by a factor of 2 from the last time a
+ descriptor was generated, and at least a given interval of time
+ (20 mins by default) has passed since then.
+
+ - Its uptime has been reset (by restarting).
+
+ [XXX this list is incomplete; see router_differences_are_cosmetic()
+ in routerlist.c for others]
+
+ ORs SHOULD NOT publish a new router descriptor or extra-info document
+ if none of the above events have occurred and not much time has passed
+ (12 hours by default).
+
+ After generating a descriptor, ORs upload them to every directory
+ authority they know, by posting them (in order) to the URL
+
+ http:///tor/
+
+2.1. Router descriptor format
+
+ Router descriptors consist of the following items. For backward
+ compatibility, there should be an extra NL at the end of each router
+ descriptor.
+
+ In lines that take multiple arguments, extra arguments SHOULD be
+ accepted and ignored. Many of the nonterminals below are defined in
+ section 2.3.
+
+ "router" nickname address ORPort SOCKSPort DirPort NL
+
+ [At start, exactly once.]
+
+ Indicates the beginning of a router descriptor. "nickname" must be a
+ valid router nickname as specified in 2.3. "address" must be an IPv4
+ address in dotted-quad format. The last three numbers indicate the
+ TCP ports at which this OR exposes functionality. ORPort is a port at
+ which this OR accepts TLS connections for the main OR protocol;
+ SOCKSPort is deprecated and should always be 0; and DirPort is the
+ port at which this OR accepts directory-related HTTP connections. If
+ any port is not supported, the value 0 is given instead of a port
+ number. (At least one of DirPort and ORPort SHOULD be set;
+ authorities MAY reject any descriptor with both DirPort and ORPort of
+ 0.)
+
+ "bandwidth" bandwidth-avg bandwidth-burst bandwidth-observed NL
+
+ [Exactly once]
+
+ Estimated bandwidth for this router, in bytes per second. The
+ "average" bandwidth is the volume per second that the OR is willing to
+ sustain over long periods; the "burst" bandwidth is the volume that
+ the OR is willing to sustain in very short intervals. The "observed"
+ value is an estimate of the capacity this server can handle. The
+ server remembers the max bandwidth sustained output over any ten
+ second period in the past day, and another sustained input. The
+ "observed" value is the lesser of these two numbers.
+
+ "platform" string NL
+
+ [At most once]
+
+ A human-readable string describing the system on which this OR is
+ running. This MAY include the operating system, and SHOULD include
+ the name and version of the software implementing the Tor protocol.
+
+ "published" YYYY-MM-DD HH:MM:SS NL
+
+ [Exactly once]
+
+ The time, in GMT, when this descriptor (and its corresponding
+ extra-info document if any) was generated.
+
+ "fingerprint" fingerprint NL
+
+ [At most once]
+
+ A fingerprint (a HASH_LEN-byte of asn1 encoded public key, encoded in
+ hex, with a single space after every 4 characters) for this router's
+ identity key. A descriptor is considered invalid (and MUST be
+ rejected) if the fingerprint line does not match the public key.
+
+ [We didn't start parsing this line until Tor 0.1.0.6-rc; it should
+ be marked with "opt" until earlier versions of Tor are obsolete.]
+
+ "hibernating" bool NL
+
+ [At most once]
+
+ If the value is 1, then the Tor server was hibernating when the
+ descriptor was published, and shouldn't be used to build circuits.
+
+ [We didn't start parsing this line until Tor 0.1.0.6-rc; it should be
+ marked with "opt" until earlier versions of Tor are obsolete.]
+
+ "uptime" number NL
+
+ [At most once]
+
+ The number of seconds that this OR process has been running.
+
+ "onion-key" NL a public key in PEM format
+
+ [Exactly once]
+
+ This key is used to encrypt EXTEND cells for this OR. The key MUST be
+ accepted for at least 1 week after any new key is published in a
+ subsequent descriptor. It MUST be 1024 bits.
+
+ "signing-key" NL a public key in PEM format
+
+ [Exactly once]
+
+ The OR's long-term identity key. It MUST be 1024 bits.
+
+ "accept" exitpattern NL
+ "reject" exitpattern NL
+
+ [Any number]
+
+ These lines describe an "exit policy": the rules that an OR follows
+ when deciding whether to allow a new stream to a given address. The
+ 'exitpattern' syntax is described below. There MUST be at least one
+ such entry. The rules are considered in order; if no rule matches,
+ the address will be accepted. For clarity, the last such entry SHOULD
+ be accept *:* or reject *:*.
+
+ "router-signature" NL Signature NL
+
+ [At end, exactly once]
+
+ The "SIGNATURE" object contains a signature of the PKCS1-padded
+ hash of the entire router descriptor, taken from the beginning of the
+ "router" line, through the newline after the "router-signature" line.
+ The router descriptor is invalid unless the signature is performed
+ with the router's identity key.
+
+ "contact" info NL
+
+ [At most once]
+
+ Describes a way to contact the server's administrator, preferably
+ including an email address and a PGP key fingerprint.
+
+ "family" names NL
+
+ [At most once]
+
+ 'Names' is a space-separated list of server nicknames or
+ hexdigests. If two ORs list one another in their "family" entries,
+ then OPs should treat them as a single OR for the purpose of path
+ selection.
+
+ For example, if node A's descriptor contains "family B", and node B's
+ descriptor contains "family A", then node A and node B should never
+ be used on the same circuit.
+
+ "read-history" YYYY-MM-DD HH:MM:SS (NSEC s) NUM,NUM,NUM,NUM,NUM... NL
+ [At most once]
+ "write-history" YYYY-MM-DD HH:MM:SS (NSEC s) NUM,NUM,NUM,NUM,NUM... NL
+ [At most once]
+
+ Declare how much bandwidth the OR has used recently. Usage is divided
+ into intervals of NSEC seconds. The YYYY-MM-DD HH:MM:SS field
+ defines the end of the most recent interval. The numbers are the
+ number of bytes used in the most recent intervals, ordered from
+ oldest to newest.
+
+ [We didn't start parsing these lines until Tor 0.1.0.6-rc; they should
+ be marked with "opt" until earlier versions of Tor are obsolete.]
+
+ [See also migration notes in section 2.2.1.]
+
+ "eventdns" bool NL
+
+ [At most once]
+
+ Declare whether this version of Tor is using the newer enhanced
+ dns logic. Versions of Tor with this field set to false SHOULD NOT
+ be used for reverse hostname lookups.
+
+ [All versions of Tor before 0.1.2.2-alpha should be assumed to have
+ this option set to 0 if it is not present. All Tor versions at
+ 0.1.2.2-alpha or later should be assumed to have this option set to
+ 1 if it is not present. Until 0.1.2.1-alpha-dev, this option was
+ not generated, even when the new DNS code was in use. Versions of Tor
+ before 0.1.2.1-alpha-dev did not parse this option, so it should be
+ marked "opt". The dnsworker logic has been removed, so this option
+ should not be used by new server code. However, it can still be
+ used, and should still be recognized by new code until Tor 0.1.2.x
+ is obsolete.]
+
+ "caches-extra-info" NL
+
+ [At most once.]
+
+ Present only if this router is a directory cache that provides
+ extra-info documents.
+
+ [Versions before 0.2.0.1-alpha don't recognize this, and versions
+ before 0.1.2.5-alpha will reject descriptors containing it unless
+ it is prefixed with "opt"; it should be so prefixed until these
+ versions are obsolete.]
+
+ "extra-info-digest" digest NL
+
+ [At most once]
+
+ "Digest" is a hex-encoded digest (using upper-case characters) of the
+ router's extra-info document, as signed in the router's extra-info
+ (that is, not including the signature). (If this field is absent, the
+ router is not uploading a corresponding extra-info document.)
+
+ [Versions before 0.2.0.1-alpha don't recognize this, and versions
+ before 0.1.2.5-alpha will reject descriptors containing it unless
+ it is prefixed with "opt"; it should be so prefixed until these
+ versions are obsolete.]
+
+ "hidden-service-dir" *(SP VersionNum) NL
+
+ [At most once.]
+
+ Present only if this router stores and serves hidden service
+ descriptors. If any VersionNum(s) are specified, this router
+ supports those descriptor versions. If none are specified, it
+ defaults to version 2 descriptors.
+
+ [Versions of Tor before 0.1.2.5-alpha rejected router descriptors
+ with unrecognized items; the protocols line should be preceded with
+ an "opt" until these Tors are obsolete.]
+
+ "protocols" SP "Link" SP LINK-VERSION-LIST SP "Circuit" SP
+ CIRCUIT-VERSION-LIST NL
+
+ [At most once.]
+
+ Both lists are space-separated sequences of numbers, to indicate which
+ protocols the server supports. As of 30 Mar 2008, specified
+ protocols are "Link 1 2 Circuit 1". See section 4.1 of tor-spec.txt
+ for more information about link protocol versions.
+
+ [Versions of Tor before 0.1.2.5-alpha rejected router descriptors
+ with unrecognized items; the protocols line should be preceded with
+ an "opt" until these Tors are obsolete.]
+
+ "allow-single-hop-exits"
+
+ [At most once.]
+
+ Present only if the router allows single-hop circuits to make exit
+ connections. Most Tor servers do not support this: this is
+ included for specialized controllers designed to support perspective
+ access and such.
+
+
+2.2. Extra-info documents
+
+ Extra-info documents consist of the following items:
+
+ "extra-info" Nickname Fingerprint NL
+ [At start, exactly once.]
+
+ Identifies what router this is an extra info descriptor for.
+ Fingerprint is encoded in hex (using upper-case letters), with
+ no spaces.
+
+ "published"
+
+ [Exactly once.]
+
+ The time, in GMT, when this document (and its corresponding router
+ descriptor if any) was generated. It MUST match the published time
+ in the corresponding router descriptor.
+
+ "read-history" YYYY-MM-DD HH:MM:SS (NSEC s) NUM,NUM,NUM,NUM,NUM... NL
+ [At most once.]
+ "write-history" YYYY-MM-DD HH:MM:SS (NSEC s) NUM,NUM,NUM,NUM,NUM... NL
+ [At most once.]
+
+ As documented in 2.1 above. See migration notes in section 2.2.1.
+
+ "geoip-start" YYYY-MM-DD HH:MM:SS NL
+ "geoip-client-origins" CC=N,CC=N,... NL
+
+ Only generated by bridge routers (see blocking.pdf), and only
+ when they have been configured with a geoip database.
+ Non-bridges SHOULD NOT generate these fields. Contains a list
+ of mappings from two-letter country codes (CC) to the number
+ of clients that have connected to that bridge from that
+ country (approximate, and rounded up to the nearest multiple of 8
+ in order to hamper traffic analysis). A country is included
+ only if it has at least one address. The time in
+ "geoip-start" is the time at which we began collecting geoip
+ statistics.
+
+ "dirreq-stats-end" YYYY-MM-DD HH:MM:SS (NSEC s) NL
+ [At most once.]
+
+ YYYY-MM-DD HH:MM:SS defines the end of the included measurement
+ interval of length NSEC seconds (86400 seconds by default).
+
+ A "dirreq-stats-end" line, as well as any other "dirreq-*" line,
+ is only added when the relay has opened its Dir port and after 24
+ hours of measuring directory requests.
+
+ "dirreq-v2-ips" CC=N,CC=N,... NL
+ [At most once.]
+ "dirreq-v3-ips" CC=N,CC=N,... NL
+ [At most once.]
+
+ List of mappings from two-letter country codes to the number of
+ unique IP addresses that have connected from that country to
+ request a v2/v3 network status, rounded up to the nearest multiple
+ of 8. Only those IP addresses are counted that the directory can
+ answer with a 200 OK status code.
+
+ "dirreq-v2-reqs" CC=N,CC=N,... NL
+ [At most once.]
+ "dirreq-v3-reqs" CC=N,CC=N,... NL
+ [At most once.]
+
+ List of mappings from two-letter country codes to the number of
+ requests for v2/v3 network statuses from that country, rounded up
+ to the nearest multiple of 8. Only those requests are counted that
+ the directory can answer with a 200 OK status code.
+
+ "dirreq-v2-share" num% NL
+ [At most once.]
+ "dirreq-v3-share" num% NL
+ [At most once.]
+
+ The share of v2/v3 network status requests that the directory
+ expects to receive from clients based on its advertised bandwidth
+ compared to the overall network bandwidth capacity. Shares are
+ formatted in percent with two decimal places. Shares are
+ calculated as means over the whole 24-hour interval.
+
+ "dirreq-v2-resp" status=num,... NL
+ [At most once.]
+ "dirreq-v3-resp" status=nul,... NL
+ [At most once.]
+
+ List of mappings from response statuses to the number of requests
+ for v2/v3 network statuses that were answered with that response
+ status, rounded up to the nearest multiple of 4. Only response
+ statuses with at least 1 response are reported. New response
+ statuses can be added at any time. The current list of response
+ statuses is as follows:
+
+ "ok": a network status request is answered; this number
+ corresponds to the sum of all requests as reported in
+ "dirreq-v2-reqs" or "dirreq-v3-reqs", respectively, before
+ rounding up.
+ "not-enough-sigs: a version 3 network status is not signed by a
+ sufficient number of requested authorities.
+ "unavailable": a requested network status object is unavailable.
+ "not-found": a requested network status is not found.
+ "not-modified": a network status has not been modified since the
+ If-Modified-Since time that is included in the request.
+ "busy": the directory is busy.
+
+ "dirreq-v2-direct-dl" key=val,... NL
+ [At most once.]
+ "dirreq-v3-direct-dl" key=val,... NL
+ [At most once.]
+ "dirreq-v2-tunneled-dl" key=val,... NL
+ [At most once.]
+ "dirreq-v3-tunneled-dl" key=val,... NL
+ [At most once.]
+
+ List of statistics about possible failures in the download process
+ of v2/v3 network statuses. Requests are either "direct"
+ HTTP-encoded requests over the relay's directory port, or
+ "tunneled" requests using a BEGIN_DIR cell over the relay's OR
+ port. The list of possible statistics can change, and statistics
+ can be left out from reporting. The current list of statistics is
+ as follows:
+
+ Successful downloads and failures:
+
+ "complete": a client has finished the download successfully.
+ "timeout": a download did not finish within 10 minutes after
+ starting to send the response.
+ "running": a download is still running at the end of the
+ measurement period for less than 10 minutes after starting to
+ send the response.
+
+ Download times:
+
+ "min", "max": smallest and largest measured bandwidth in B/s.
+ "d[1-4,6-9]": 1st to 4th and 6th to 9th decile of measured
+ bandwidth in B/s. For a given decile i, i/10 of all downloads
+ had a smaller bandwidth than di, and (10-i)/10 of all downloads
+ had a larger bandwidth than di.
+ "q[1,3]": 1st and 3rd quartile of measured bandwidth in B/s. One
+ fourth of all downloads had a smaller bandwidth than q1, one
+ fourth of all downloads had a larger bandwidth than q3, and the
+ remaining half of all downloads had a bandwidth between q1 and
+ q3.
+ "md": median of measured bandwidth in B/s. Half of the downloads
+ had a smaller bandwidth than md, the other half had a larger
+ bandwidth than md.
+
+ "entry-stats-end" YYYY-MM-DD HH:MM:SS (NSEC s) NL
+ [At most once.]
+
+ YYYY-MM-DD HH:MM:SS defines the end of the included measurement
+ interval of length NSEC seconds (86400 seconds by default).
+
+ An "entry-stats-end" line, as well as any other "entry-*"
+ line, is first added after the relay has been running for at least
+ 24 hours.
+
+ "entry-ips" CC=N,CC=N,... NL
+ [At most once.]
+
+ List of mappings from two-letter country codes to the number of
+ unique IP addresses that have connected from that country to the
+ relay and which are no known other relays, rounded up to the
+ nearest multiple of 8.
+
+ "cell-stats-end" YYYY-MM-DD HH:MM:SS (NSEC s) NL
+ [At most once.]
+
+ YYYY-MM-DD HH:MM:SS defines the end of the included measurement
+ interval of length NSEC seconds (86400 seconds by default).
+
+ A "cell-stats-end" line, as well as any other "cell-*" line,
+ is first added after the relay has been running for at least 24
+ hours.
+
+ "cell-processed-cells" num,...,num NL
+ [At most once.]
+
+ Mean number of processed cells per circuit, subdivided into
+ deciles of circuits by the number of cells they have processed in
+ descending order from loudest to quietest circuits.
+
+ "cell-queued-cells" num,...,num NL
+ [At most once.]
+
+ Mean number of cells contained in queues by circuit decile. These
+ means are calculated by 1) determining the mean number of cells in
+ a single circuit between its creation and its termination and 2)
+ calculating the mean for all circuits in a given decile as
+ determined in "cell-processed-cells". Numbers have a precision of
+ two decimal places.
+
+ "cell-time-in-queue" num,...,num NL
+ [At most once.]
+
+ Mean time cells spend in circuit queues in milliseconds. Times are
+ calculated by 1) determining the mean time cells spend in the
+ queue of a single circuit and 2) calculating the mean for all
+ circuits in a given decile as determined in
+ "cell-processed-cells".
+
+ "cell-circuits-per-decile" num NL
+ [At most once.]
+
+ Mean number of circuits that are included in any of the deciles,
+ rounded up to the next integer.
+
+ "exit-stats-end" YYYY-MM-DD HH:MM:SS (NSEC s) NL
+ [At most once.]
+
+ YYYY-MM-DD HH:MM:SS defines the end of the included measurement
+ interval of length NSEC seconds (86400 seconds by default).
+
+ An "exit-stats-end" line, as well as any other "exit-*" line, is
+ first added after the relay has been running for at least 24 hours
+ and only if the relay permits exiting (where exiting to a single
+ port and IP address is sufficient).
+
+ "exit-kibibytes-written" port=N,port=N,... NL
+ [At most once.]
+ "exit-kibibytes-read" port=N,port=N,... NL
+ [At most once.]
+
+ List of mappings from ports to the number of kibibytes that the
+ relay has written to or read from exit connections to that port,
+ rounded up to the next full kibibyte.
+
+ "exit-streams-opened" port=N,port=N,... NL
+ [At most once.]
+
+ List of mappings from ports to the number of opened exit streams
+ to that port, rounded up to the nearest multiple of 4.
+
+ "router-signature" NL Signature NL
+ [At end, exactly once.]
+
+ A document signature as documented in section 1.3, using the
+ initial item "extra-info" and the final item "router-signature",
+ signed with the router's identity key.
+
+2.2.1. Moving history fields to extra-info documents.
+
+ Tools that want to use the read-history and write-history values SHOULD
+ download extra-info documents as well as router descriptors. Such
+ tools SHOULD accept history values from both sources; if they appear in
+ both documents, the values in the extra-info documents are authoritative.
+
+ New versions of Tor no longer generate router descriptors
+ containing read-history or write-history. Tools should continue to
+ accept read-history and write-history values in router descriptors
+ produced by older versions of Tor until all Tor versions earlier
+ than 0.2.0.x are obsolete.
+
+2.3. Nonterminals in router descriptors
+
+ nickname ::= between 1 and 19 alphanumeric characters ([A-Za-z0-9]),
+ case-insensitive.
+ hexdigest ::= a '$', followed by 40 hexadecimal characters
+ ([A-Fa-f0-9]). [Represents a server by the digest of its identity
+ key.]
+
+ exitpattern ::= addrspec ":" portspec
+ portspec ::= "*" | port | port "-" port
+ port ::= an integer between 1 and 65535, inclusive.
+
+ [Some implementations incorrectly generate ports with value 0.
+ Implementations SHOULD accept this, and SHOULD NOT generate it.
+ Connections to port 0 are never permitted.]
+
+ addrspec ::= "*" | ip4spec | ip6spec
+ ipv4spec ::= ip4 | ip4 "/" num_ip4_bits | ip4 "/" ip4mask
+ ip4 ::= an IPv4 address in dotted-quad format
+ ip4mask ::= an IPv4 mask in dotted-quad format
+ num_ip4_bits ::= an integer between 0 and 32
+ ip6spec ::= ip6 | ip6 "/" num_ip6_bits
+ ip6 ::= an IPv6 address, surrounded by square brackets.
+ num_ip6_bits ::= an integer between 0 and 128
+
+ bool ::= "0" | "1"
+
+3. Formats produced by directory authorities.
+
+ Every authority has two keys used in this protocol: a signing key, and
+ an authority identity key. (Authorities also have a router identity
+ key used in their role as a router and by earlier versions of the
+ directory protocol.) The identity key is used from time to time to
+ sign new key certificates using new signing keys; it is very sensitive.
+ The signing key is used to sign key certificates and status documents.
+
+ There are three kinds of documents generated by directory authorities:
+
+ Key certificates
+ Status votes
+ Status consensuses
+
+ Each is discussed below.
+
+3.1. Key certificates
+
+ Key certificates consist of the following items:
+
+ "dir-key-certificate-version" version NL
+
+ [At start, exactly once.]
+
+ Determines the version of the key certificate. MUST be "3" for
+ the protocol described in this document. Implementations MUST
+ reject formats they don't understand.
+
+ "dir-address" IPPort NL
+ [At most once]
+
+ An IP:Port for this authority's directory port.
+
+ "fingerprint" fingerprint NL
+
+ [Exactly once.]
+
+ Hexadecimal encoding without spaces based on the authority's
+ identity key.
+
+ "dir-identity-key" NL a public key in PEM format
+
+ [Exactly once.]
+
+ The long-term authority identity key for this authority. This key
+ SHOULD be at least 2048 bits long; it MUST NOT be shorter than
+ 1024 bits.
+
+ "dir-key-published" YYYY-MM-DD HH:MM:SS NL
+
+ [Exactly once.]
+
+ The time (in GMT) when this document and corresponding key were
+ last generated.
+
+ "dir-key-expires" YYYY-MM-DD HH:MM:SS NL
+
+ [Exactly once.]
+
+ A time (in GMT) after which this key is no longer valid.
+
+ "dir-signing-key" NL a key in PEM format
+
+ [Exactly once.]
+
+ The directory server's public signing key. This key MUST be at
+ least 1024 bits, and MAY be longer.
+
+ "dir-key-crosscert" NL CrossSignature NL
+
+ [At most once.]
+
+ NOTE: Authorities MUST include this field in all newly generated
+ certificates. A future version of this specification will make
+ the field required.
+
+ CrossSignature is a signature, made using the certificate's signing
+ key, of the digest of the PKCS1-padded hash of the certificate's
+ identity key. For backward compatibility with broken versions of the
+ parser, we wrap the base64-encoded signature in -----BEGIN ID
+ SIGNATURE---- and -----END ID SIGNATURE----- tags. Implementations
+ MUST allow the "ID " portion to be omitted, however.
+
+ When encountering a certificate with a dir-key-crosscert entry,
+ implementations MUST verify that the signature is a correct signature
+ of the hash of the identity key using the signing key.
+
+ "dir-key-certification" NL Signature NL
+
+ [At end, exactly once.]
+
+ A document signature as documented in section 1.3, using the
+ initial item "dir-key-certificate-version" and the final item
+ "dir-key-certification", signed with the authority identity key.
+
+ Authorities MUST generate a new signing key and corresponding
+ certificate before the key expires.
+
+3.2. Vote and consensus status documents
+
+ Votes and consensuses are more strictly formatted then other documents
+ in this specification, since different authorities must be able to
+ generate exactly the same consensus given the same set of votes.
+
+ The procedure for deciding when to generate vote and consensus status
+ documents are described in section XXX below.
+
+ Status documents contain a preamble, an authority section, a list of
+ router status entries, and one or more footer signature, in that order.
+
+ Unlike other formats described above, a SP in these documents must be a
+ single space character (hex 20).
+
+ Some items appear only in votes, and some items appear only in
+ consensuses. Unless specified, items occur in both.
+
+ The preamble contains the following items. They MUST occur in the
+ order given here:
+
+ "network-status-version" SP version NL.
+
+ [At start, exactly once.]
+
+ A document format version. For this specification, the version is
+ "3".
+
+ "vote-status" SP type NL
+
+ [Exactly once.]
+
+ The status MUST be "vote" or "consensus", depending on the type of
+ the document.
+
+ "consensus-methods" SP IntegerList NL
+
+ [Exactly once for votes; does not occur in consensuses.]
+
+ A space-separated list of supported methods for generating
+ consensuses from votes. See section 3.4.1 for details. Method "1"
+ MUST be included.
+
+ "consensus-method" SP Integer NL
+
+ [Exactly once for consensuses; does not occur in votes.]
+
+ See section 3.4.1 for details.
+
+ (Only included when the vote is generated with consensus-method 2 or
+ later.)
+
+ "published" SP YYYY-MM-DD SP HH:MM:SS NL
+
+ [Exactly once for votes; does not occur in consensuses.]
+
+ The publication time for this status document (if a vote).
+
+ "valid-after" SP YYYY-MM-DD SP HH:MM:SS NL
+
+ [Exactly once.]
+
+ The start of the Interval for this vote. Before this time, the
+ consensus document produced from this vote should not be used.
+ See 1.4 for voting timeline information.
+
+ "fresh-until" SP YYYY-MM-DD SP HH:MM:SS NL
+
+ [Exactly once.]
+
+ The time at which the next consensus should be produced; before this
+ time, there is no point in downloading another consensus, since there
+ won't be a new one. See 1.4 for voting timeline information.
+
+ "valid-until" SP YYYY-MM-DD SP HH:MM:SS NL
+
+ [Exactly once.]
+
+ The end of the Interval for this vote. After this time, the
+ consensus produced by this vote should not be used. See 1.4 for
+ voting timeline information.
+
+ "voting-delay" SP VoteSeconds SP DistSeconds NL
+
+ [Exactly once.]
+
+ VoteSeconds is the number of seconds that we will allow to collect
+ votes from all authorities; DistSeconds is the number of seconds
+ we'll allow to collect signatures from all authorities. See 1.4 for
+ voting timeline information.
+
+ "client-versions" SP VersionList NL
+
+ [At most once.]
+
+ A comma-separated list of recommended client versions, in
+ ascending order. If absent, no opinion is held about client
+ versions.
+
+ "server-versions" SP VersionList NL
+
+ [At most once.]
+
+ A comma-separated list of recommended server versions, in
+ ascending order. If absent, no opinion is held about server
+ versions.
+
+ "known-flags" SP FlagList NL
+
+ [Exactly once.]
+
+ A space-separated list of all of the flags that this document
+ might contain. A flag is "known" either because the authority
+ knows about them and might set them (if in a vote), or because
+ enough votes were counted for the consensus for an authoritative
+ opinion to have been formed about their status.
+
+ "params" SP [Parameters] NL
+
+ [At most once]
+
+ Parameter ::= Keyword '=' Int32
+ Int32 ::= A decimal integer between -2147483648 and 2147483647.
+ Parameters ::= Parameter | Parameters SP Parameter
+
+ The parameters list, if present, contains a space-separated list of
+ key-value pairs, sorted in lexical order by their keyword. Each
+ parameter has its own meaning.
+
+ (Only included when the vote is generated with consensus-method 7 or
+ later.)
+
+ The authority section of a vote contains the following items, followed
+ in turn by the authority's current key certificate:
+
+ "dir-source" SP nickname SP identity SP address SP IP SP dirport SP
+ orport NL
+
+ [Exactly once, at start]
+
+ Describes this authority. The nickname is a convenient identifier
+ for the authority. The identity is an uppercase hex fingerprint of
+ the authority's current (v3 authority) identity key. The address is
+ the server's hostname. The IP is the server's current IP address,
+ and dirport is its current directory port. XXXXorport
+
+ "contact" SP string NL
+
+ [At most once.]
+
+ An arbitrary string describing how to contact the directory
+ server's administrator. Administrators should include at least an
+ email address and a PGP fingerprint.
+
+ "legacy-key" SP FINGERPRINT NL
+
+ [At most once]
+
+ Lists a fingerprint for an obsolete _identity_ key still used
+ by this authority to keep older clients working. This option
+ is used to keep key around for a little while in case the
+ authorities need to migrate many identity keys at once.
+ (Generally, this would only happen because of a security
+ vulnerability that affected multiple authorities, like the
+ Debian OpenSSL RNG bug of May 2008.)
+
+ The authority section of a consensus contains groups the following items,
+ in the order given, with one group for each authority that contributed to
+ the consensus, with groups sorted by authority identity digest:
+
+ "dir-source" SP nickname SP identity SP address SP IP SP dirport SP
+ orport NL
+
+ [Exactly once, at start]
+
+ As in the authority section of a vote.
+
+ "contact" SP string NL
+
+ [At most once.]
+
+ As in the authority section of a vote.
+
+ "vote-digest" SP digest NL
+
+ [Exactly once.]
+
+ A digest of the vote from the authority that contributed to this
+ consensus, as signed (that is, not including the signature).
+ (Hex, upper-case.)
+
+ Each router status entry contains the following items. Router status
+ entries are sorted in ascending order by identity digest.
+
+ "r" SP nickname SP identity SP digest SP publication SP IP SP ORPort
+ SP DirPort NL
+
+ [At start, exactly once.]
+
+ "Nickname" is the OR's nickname. "Identity" is a hash of its
+ identity key, encoded in base64, with trailing equals sign(s)
+ removed. "Digest" is a hash of its most recent descriptor as
+ signed (that is, not including the signature), encoded in base64.
+ "Publication" is the
+ publication time of its most recent descriptor, in the form
+ YYYY-MM-DD HH:MM:SS, in GMT. "IP" is its current IP address;
+ ORPort is its current OR port, "DirPort" is it's current directory
+ port, or "0" for "none".
+
+ "s" SP Flags NL
+
+ [At most once.]
+
+ A series of space-separated status flags, in alphabetical order.
+ Currently documented flags are:
+
+ "Authority" if the router is a directory authority.
+ "BadExit" if the router is believed to be useless as an exit node
+ (because its ISP censors it, because it is behind a restrictive
+ proxy, or for some similar reason).
+ "BadDirectory" if the router is believed to be useless as a
+ directory cache (because its directory port isn't working,
+ its bandwidth is always throttled, or for some similar
+ reason).
+ "Exit" if the router is more useful for building
+ general-purpose exit circuits than for relay circuits. The
+ path building algorithm uses this flag; see path-spec.txt.
+ "Fast" if the router is suitable for high-bandwidth circuits.
+ "Guard" if the router is suitable for use as an entry guard.
+ "HSDir" if the router is considered a v2 hidden service directory.
+ "Named" if the router's identity-nickname mapping is canonical,
+ and this authority binds names.
+ "Stable" if the router is suitable for long-lived circuits.
+ "Running" if the router is currently usable.
+ "Unnamed" if another router has bound the name used by this
+ router, and this authority binds names.
+ "Valid" if the router has been 'validated'.
+ "V2Dir" if the router implements the v2 directory protocol.
+ "V3Dir" if the router implements this protocol.
+
+ "v" SP version NL
+
+ [At most once.]
+
+ The version of the Tor protocol that this server is running. If
+ the value begins with "Tor" SP, the rest of the string is a Tor
+ version number, and the protocol is "The Tor protocol as supported
+ by the given version of Tor." Otherwise, if the value begins with
+ some other string, Tor has upgraded to a more sophisticated
+ protocol versioning system, and the protocol is "a version of the
+ Tor protocol more recent than any we recognize."
+
+ Directory authorities SHOULD omit version strings they receive from
+ descriptors if they would cause "v" lines to be over 128 characters
+ long.
+
+ "w" SP "Bandwidth=" INT [SP "Measured=" INT] NL
+
+ [At most once.]
+
+ An estimate of the bandwidth of this server, in an arbitrary
+ unit (currently kilobytes per second). Used to weight router
+ selection.
+
+ Additionally, the Measured= keyword is present in votes by
+ participating bandwidth measurement authorites to indicate
+ a measured bandwidth currently produced by measuring stream
+ capacities.
+
+ Other weighting keywords may be added later.
+ Clients MUST ignore keywords they do not recognize.
+
+ "p" SP ("accept" / "reject") SP PortList NL
+
+ [At most once.]
+
+ PortList = PortOrRange
+ PortList = PortList "," PortOrRange
+ PortOrRange = INT "-" INT / INT
+
+ A list of those ports that this router supports (if 'accept')
+ or does not support (if 'reject') for exit to "most
+ addresses".
+
+ The signature section contains the following item, which appears
+ Exactly Once for a vote, and At Least Once for a consensus.
+
+ "directory-signature" SP identity SP signing-key-digest NL Signature
+
+ This is a signature of the status document, with the initial item
+ "network-status-version", and the signature item
+ "directory-signature", using the signing key. (In this case, we take
+ the hash through the _space_ after directory-signature, not the
+ newline: this ensures that all authorities sign the same thing.)
+ "identity" is the hex-encoded digest of the authority identity key of
+ the signing authority, and "signing-key-digest" is the hex-encoded
+ digest of the current authority signing key of the signing authority.
+
+3.3. Deciding how to vote.
+
+ (This section describes how directory authorities choose which status
+ flags to apply to routers, as of Tor 0.2.0.0-alpha-dev. Later directory
+ authorities MAY do things differently, so long as clients keep working
+ well. Clients MUST NOT depend on the exact behaviors in this section.)
+
+ In the below definitions, a router is considered "active" if it is
+ running, valid, and not hibernating.
+
+ "Valid" -- a router is 'Valid' if it is running a version of Tor not
+ known to be broken, and the directory authority has not blacklisted
+ it as suspicious.
+
+ "Named" -- Directory authority administrators may decide to support name
+ binding. If they do, then they must maintain a file of
+ nickname-to-identity-key mappings, and try to keep this file consistent
+ with other directory authorities. If they don't, they act as clients, and
+ report bindings made by other directory authorities (name X is bound to
+ identity Y if at least one binding directory lists it, and no directory
+ binds X to some other Y'.) A router is called 'Named' if the router
+ believes the given name should be bound to the given key.
+
+ Two strategies exist on the current network for deciding on
+ values for the Named flag. In the original version, server
+ operators were asked to send nickname-identity pairs to a
+ mailing list of Naming directory authorities operators. The
+ operators were then supposed to add the pairs to their
+ mapping files; in practice, they didn't get to this often.
+
+ Newer Naming authorities run a script that registers routers
+ in their mapping files once the routers have been online at
+ least two weeks, no other router has that nickname, and no
+ other router has wanted the nickname for a month. If a router
+ has not been online for six months, the router is removed.
+
+ "Unnamed" -- Directory authorities that support naming should vote for a
+ router to be 'Unnamed' if its given nickname is mapped to a different
+ identity.
+
+ "Running" -- A router is 'Running' if the authority managed to connect to
+ it successfully within the last 30 minutes.
+
+ "Stable" -- A router is 'Stable' if it is active, and either its Weighted
+ MTBF is at least the median for known active routers or its Weighted MTBF
+ corresponds to at least 7 days. Routers are never called Stable if they are
+ running a version of Tor known to drop circuits stupidly. (0.1.1.10-alpha
+ through 0.1.1.16-rc are stupid this way.)
+
+ To calculate weighted MTBF, compute the weighted mean of the lengths
+ of all intervals when the router was observed to be up, weighting
+ intervals by $\alpha^n$, where $n$ is the amount of time that has
+ passed since the interval ended, and $\alpha$ is chosen so that
+ measurements over approximately one month old no longer influence the
+ weighted MTBF much.
+
+ [XXXX what happens when we have less than 4 days of MTBF info.]
+
+ "Exit" -- A router is called an 'Exit' iff it allows exits to at
+ least two of the ports 80, 443, and 6667 and allows exits to at
+ least one /8 address space.
+
+ "Fast" -- A router is 'Fast' if it is active, and its bandwidth is
+ either in the top 7/8ths for known active routers or at least 100KB/s.
+
+ "Guard" -- A router is a possible 'Guard' if its Weighted Fractional
+ Uptime is at least the median for "familiar" active routers, and if
+ its bandwidth is at least median or at least 250KB/s.
+ If the total bandwidth of active non-BadExit Exit servers is less
+ than one third of the total bandwidth of all active servers, no Exit is
+ listed as a Guard.
+
+ To calculate weighted fractional uptime, compute the fraction
+ of time that the router is up in any given day, weighting so that
+ downtime and uptime in the past counts less.
+
+ A node is 'familiar' if 1/8 of all active nodes have appeared more
+ recently than it, OR it has been around for a few weeks.
+
+ "Authority" -- A router is called an 'Authority' if the authority
+ generating the network-status document believes it is an authority.
+
+ "V2Dir" -- A router supports the v2 directory protocol if it has an open
+ directory port, and it is running a version of the directory protocol that
+ supports the functionality clients need. (Currently, this is
+ 0.1.1.9-alpha or later.)
+
+ "V3Dir" -- A router supports the v3 directory protocol if it has an open
+ directory port, and it is running a version of the directory protocol that
+ supports the functionality clients need. (Currently, this is
+ 0.2.0.?????-alpha or later.)
+
+ "HSDir" -- A router is a v2 hidden service directory if it stores and
+ serves v2 hidden service descriptors and the authority managed to connect
+ to it successfully within the last 24 hours.
+
+ Directory server administrators may label some servers or IPs as
+ blacklisted, and elect not to include them in their network-status lists.
+
+ Authorities SHOULD 'disable' any servers in excess of 3 on any single IP.
+ When there are more than 3 to choose from, authorities should first prefer
+ authorities to non-authorities, then prefer Running to non-Running, and
+ then prefer high-bandwidth to low-bandwidth. To 'disable' a server, the
+ authority *should* advertise it without the Running or Valid flag.
+
+ Thus, the network-status vote includes all non-blacklisted,
+ non-expired, non-superseded descriptors.
+
+ The bandwidth in a "w" line should be taken as the best estimate
+ of the router's actual capacity that the authority has. For now,
+ this should be the lesser of the observed bandwidth and bandwidth
+ rate limit from the router descriptor. It is given in kilobytes
+ per second, and capped at some arbitrary value (currently 10 MB/s).
+
+ The Measured= keyword on a "w" line vote is currently computed
+ by multiplying the previous published consensus bandwidth by the
+ ratio of the measured average node stream capacity to the network
+ average. If 3 or more authorities provide a Measured= keyword for
+ a router, the authorites produce a consensus containing a "w"
+ Bandwidth= keyword equal to the median of the Measured= votes.
+
+ The ports listed in a "p" line should be taken as those ports for
+ which the router's exit policy permits 'most' addresses, ignoring any
+ accept not for all addresses, ignoring all rejects for private
+ netblocks. "Most" addresses are permitted if no more than 2^25
+ IPv4 addresses (two /8 networks) were blocked. The list is encoded
+ as described in 3.4.2.
+
+3.4. Computing a consensus from a set of votes
+
+ Given a set of votes, authorities compute the contents of the consensus
+ document as follows:
+
+ The "valid-after", "valid-until", and "fresh-until" times are taken as
+ the median of the respective values from all the votes.
+
+ The times in the "voting-delay" line are taken as the median of the
+ VoteSeconds and DistSeconds times in the votes.
+
+ Known-flags is the union of all flags known by any voter.
+
+ Entries are given on the "params" line for every keyword on which any
+ authority voted. The values given are the low-median of all votes on
+ that keyword.
+
+ "client-versions" and "server-versions" are sorted in ascending
+ order; A version is recommended in the consensus if it is recommended
+ by more than half of the voting authorities that included a
+ client-versions or server-versions lines in their votes.
+
+ The authority item groups (dir-source, contact, fingerprint,
+ vote-digest) are taken from the votes of the voting
+ authorities. These groups are sorted by the digests of the
+ authorities identity keys, in ascending order. If the consensus
+ method is 3 or later, a dir-source line must be included for
+ every vote with legacy-key entry, using the legacy-key's
+ fingerprint, the voter's ordinary nickname with the string
+ "-legacy" appended, and all other fields as from the original
+ vote's dir-source line.
+
+ A router status entry:
+ * is included in the result if some router status entry with the same
+ identity is included by more than half of the authorities (total
+ authorities, not just those whose votes we have).
+
+ * For any given identity, we include at most one router status entry.
+
+ * A router entry has a flag set if that is included by more than half
+ of the authorities who care about that flag.
+
+ * Two router entries are "the same" if they have the same
+ tuple.
+ We choose the tuple for a given router as whichever tuple appears
+ for that router in the most votes. We break ties first in favor of
+ the more recently published, then in favor of smaller server
+ descriptor digest.
+
+ * The Named flag appears if it is included for this routerstatus by
+ _any_ authority, and if all authorities that list it list the same
+ nickname. However, if consensus-method 2 or later is in use, and
+ any authority calls this identity/nickname pair Unnamed, then
+ this routerstatus does not get the Named flag.
+
+ * If consensus-method 2 or later is in use, the Unnamed flag is
+ set for a routerstatus if any authorities have voted for a different
+ identities to be Named with that nickname, or if any authority
+ lists that nickname/ID pair as Unnamed.
+
+ (With consensus-method 1, Unnamed is set like any other flag.)
+
+ * The version is given as whichever version is listed by the most
+ voters, with ties decided in favor of more recent versions.
+
+ * If consensus-method 4 or later is in use, then routers that
+ do not have the Running flag are not listed at all.
+
+ * If consensus-method 5 or later is in use, then the "w" line
+ is generated using a low-median of the bandwidth values from
+ the votes that included "w" lines for this router.
+
+ * If consensus-method 5 or later is in use, then the "p" line
+ is taken from the votes that have the same policy summary
+ for the descriptor we are listing. (They should all be the
+ same. If they are not, we pick the most commonly listed
+ one, breaking ties in favor of the lexicographically larger
+ vote.) The port list is encoded as specified in 3.4.2.
+
+ * If consensus-method 6 or later is in use and if 3 or more
+ authorities provide a Measured= keyword in their votes for
+ a router, the authorities produce a consensus containing a
+ Bandwidth= keyword equal to the median of the Measured= votes.
+
+ * If consensus-method 7 or later is in use, the params line is
+ included in the output.
+
+ The signatures at the end of a consensus document are sorted in
+ ascending order by identity digest.
+
+ All ties in computing medians are broken in favor of the smaller or
+ earlier item.
+
+3.4.1. Forward compatibility
+
+ Future versions of Tor will need to include new information in the
+ consensus documents, but it is important that all authorities (or at least
+ half) generate and sign the same signed consensus.
+
+ To achieve this, authorities list in their votes their supported methods
+ for generating consensuses from votes. Later methods will be assigned
+ higher numbers. Currently recognized methods:
+ "1" -- The first implemented version.
+ "2" -- Added support for the Unnamed flag.
+ "3" -- Added legacy ID key support to aid in authority ID key rollovers
+ "4" -- No longer list routers that are not running in the consensus
+ "5" -- adds support for "w" and "p" lines.
+ "6" -- Prefers measured bandwidth values rather than advertised
+
+ Before generating a consensus, an authority must decide which consensus
+ method to use. To do this, it looks for the highest version number
+ supported by more than 2/3 of the authorities voting. If it supports this
+ method, then it uses it. Otherwise, it falls back to method 1.
+
+ (The consensuses generated by new methods must be parsable by
+ implementations that only understand the old methods, and must not cause
+ those implementations to compromise their anonymity. This is a means for
+ making changes in the contents of consensus; not for making
+ backward-incompatible changes in their format.)
+
+3.4.2. Encoding port lists
+
+ Whether the summary shows the list of accepted ports or the list of
+ rejected ports depends on which list is shorter (has a shorter string
+ representation). In case of ties we choose the list of accepted
+ ports. As an exception to this rule an allow-all policy is
+ represented as "accept 1-65535" instead of "reject " and a reject-all
+ policy is similarly given as "reject 1-65535".
+
+ Summary items are compressed, that is instead of "80-88,89-100" there
+ only is a single item of "80-100", similarly instead of "20,21" a
+ summary will say "20-21".
+
+ Port lists are sorted in ascending order.
+
+ The maximum allowed length of a policy summary (including the "accept "
+ or "reject ") is 1000 characters. If a summary exceeds that length we
+ use an accept-style summary and list as much of the port list as is
+ possible within these 1000 bytes. [XXXX be more specific.]
+
+3.5. Detached signatures
+
+ Assuming full connectivity, every authority should compute and sign the
+ same consensus directory in each period. Therefore, it isn't necessary to
+ download the consensus computed by each authority; instead, the
+ authorities only push/fetch each others' signatures. A "detached
+ signature" document contains items as follows:
+
+ "consensus-digest" SP Digest NL
+
+ [At start, at most once.]
+
+ The digest of the consensus being signed.
+
+ "valid-after" SP YYYY-MM-DD SP HH:MM:SS NL
+ "fresh-until" SP YYYY-MM-DD SP HH:MM:SS NL
+ "valid-until" SP YYYY-MM-DD SP HH:MM:SS NL
+
+ [As in the consensus]
+
+ "directory-signature"
+
+ [As in the consensus; the signature object is the same as in the
+ consensus document.]
+
+
+4. Directory server operation
+
+ All directory authorities and directory caches ("directory servers")
+ implement this section, except as noted.
+
+4.1. Accepting uploads (authorities only)
+
+ When a router posts a signed descriptor to a directory authority, the
+ authority first checks whether it is well-formed and correctly
+ self-signed. If it is, the authority next verifies that the nickname
+ in question is not already assigned to a router with a different
+ public key.
+ Finally, the authority MAY check that the router is not blacklisted
+ because of its key, IP, or another reason.
+
+ If the descriptor passes these tests, and the authority does not already
+ have a descriptor for a router with this public key, it accepts the
+ descriptor and remembers it.
+
+ If the authority _does_ have a descriptor with the same public key, the
+ newly uploaded descriptor is remembered if its publication time is more
+ recent than the most recent old descriptor for that router, and either:
+ - There are non-cosmetic differences between the old descriptor and the
+ new one.
+ - Enough time has passed between the descriptors' publication times.
+ (Currently, 12 hours.)
+
+ Differences between router descriptors are "non-cosmetic" if they would be
+ sufficient to force an upload as described in section 2 above.
+
+ Note that the "cosmetic difference" test only applies to uploaded
+ descriptors, not to descriptors that the authority downloads from other
+ authorities.
+
+ When a router posts a signed extra-info document to a directory authority,
+ the authority again checks it for well-formedness and correct signature,
+ and checks that its matches the extra-info-digest in some router
+ descriptor that it believes is currently useful. If so, it accepts it and
+ stores it and serves it as requested. If not, it drops it.
+
+4.2. Voting (authorities only)
+
+ Authorities divide time into Intervals. Authority administrators SHOULD
+ try to all pick the same interval length, and SHOULD pick intervals that
+ are commonly used divisions of time (e.g., 5 minutes, 15 minutes, 30
+ minutes, 60 minutes, 90 minutes). Voting intervals SHOULD be chosen to
+ divide evenly into a 24-hour day.
+
+ Authorities SHOULD act according to interval and delays in the
+ latest consensus. Lacking a latest consensus, they SHOULD default to a
+ 30-minute Interval, a 5 minute VotingDelay, and a 5 minute DistDelay.
+
+ Authorities MUST take pains to ensure that their clocks remain accurate
+ within a few seconds. (Running NTP is usually sufficient.)
+
+ The first voting period of each day begins at 00:00 (midnight) GMT. If
+ the last period of the day would be truncated by one-half or more, it is
+ merged with the second-to-last period.
+
+ An authority SHOULD publish its vote immediately at the start of each voting
+ period (minus VoteSeconds+DistSeconds). It does this by making it
+ available at
+ http:///tor/status-vote/next/authority.z
+ and sending it in an HTTP POST request to each other authority at the URL
+ http:///tor/post/vote
+
+ If, at the start of the voting period, minus DistSeconds, an authority
+ does not have a current statement from another authority, the first
+ authority downloads the other's statement.
+
+ Once an authority has a vote from another authority, it makes it available
+ at
+ http:///tor/status-vote/next/.z
+ where is the fingerprint of the other authority's identity key.
+ And at
+ http:///tor/status-vote/next/d/.z
+ where is the digest of the vote document.
+
+ The consensus status, along with as many signatures as the server
+ currently knows, should be available at
+ http:///tor/status-vote/next/consensus.z
+ All of the detached signatures it knows for consensus status should be
+ available at:
+ http:///tor/status-vote/next/consensus-signatures.z
+
+ Once there are enough signatures, or once the voting period starts,
+ these documents are available at
+ http:///tor/status-vote/current/consensus.z
+ and
+ http:///tor/status-vote/current/consensus-signatures.z
+ [XXX current/consensus-signatures is not currently implemented, as it
+ is not used in the voting protocol.]
+
+ The other vote documents are analogously made available under
+ http:///tor/status-vote/current/authority.z
+ http:///tor/status-vote/current/.z
+ http:///tor/status-vote/current/d/.z
+ once the consensus is complete.
+
+ Once an authority has computed and signed a consensus network status, it
+ should send its detached signature to each other authority in an HTTP POST
+ request to the URL:
+ http:///tor/post/consensus-signature
+
+ [XXX Note why we support push-and-then-pull.]
+
+ [XXX possible future features include support for downloading old
+ consensuses.]
+
+4.3. Downloading consensus status documents (caches only)
+
+ All directory servers (authorities and caches) try to keep a recent
+ network-status consensus document to serve to clients. A cache ALWAYS
+ downloads a network-status consensus if any of the following are true:
+ - The cache has no consensus document.
+ - The cache's consensus document is no longer valid.
+ Otherwise, the cache downloads a new consensus document at a randomly
+ chosen time in the first half-interval after its current consensus
+ stops being fresh. (This time is chosen at random to avoid swarming
+ the authorities at the start of each period. The interval size is
+ inferred from the difference between the valid-after time and the
+ fresh-until time on the consensus.)
+
+ [For example, if a cache has a consensus that became valid at 1:00,
+ and is fresh until 2:00, that cache will fetch a new consensus at
+ a random time between 2:00 and 2:30.]
+
+4.4. Downloading and storing router descriptors (authorities and caches)
+
+ Periodically (currently, every 10 seconds), directory servers check
+ whether there are any specific descriptors that they do not have and that
+ they are not currently trying to download. Caches identify these
+ descriptors by hash in the recent network-status consensus documents;
+ authorities identify them by hash in vote (if publication date is more
+ recent than the descriptor we currently have).
+
+ [XXXX need a way to fetch descriptors ahead of the vote? v2 status docs can
+ do that for now.]
+
+ If so, the directory server launches requests to the authorities for these
+ descriptors, such that each authority is only asked for descriptors listed
+ in its most recent vote (if the requester is an authority) or in the
+ consensus (if the requester is a cache). If we're an authority, and more
+ than one authority lists the descriptor, we choose which to ask at random.
+
+ If one of these downloads fails, we do not try to download that descriptor
+ from the authority that failed to serve it again unless we receive a newer
+ network-status (consensus or vote) from that authority that lists the same
+ descriptor.
+
+ Directory servers must potentially cache multiple descriptors for each
+ router. Servers must not discard any descriptor listed by any recent
+ consensus. If there is enough space to store additional descriptors,
+ servers SHOULD try to hold those which clients are likely to download the
+ most. (Currently, this is judged based on the interval for which each
+ descriptor seemed newest.)
+[XXXX define recent]
+
+ Authorities SHOULD NOT download descriptors for routers that they would
+ immediately reject for reasons listed in 3.1.
+
+4.5. Downloading and storing extra-info documents
+
+ All authorities, and any cache that chooses to cache extra-info documents,
+ and any client that uses extra-info documents, should implement this
+ section.
+
+ Note that generally, clients don't need extra-info documents.
+
+ Periodically, the Tor instance checks whether it is missing any extra-info
+ documents: in other words, if it has any router descriptors with an
+ extra-info-digest field that does not match any of the extra-info
+ documents currently held. If so, it downloads whatever extra-info
+ documents are missing. Caches download from authorities; non-caches try
+ to download from caches. We follow the same splitting and back-off rules
+ as in 4.4 (if a cache) or 5.3 (if a client).
+
+4.6. General-use HTTP URLs
+
+ "Fingerprints" in these URLs are base-16-encoded SHA1 hashes.
+
+ The most recent v3 consensus should be available at:
+ http:///tor/status-vote/current/consensus.z
+
+ Starting with Tor version 0.2.1.1-alpha is also available at:
+ http:///tor/status-vote/current/consensus/++.z
+
+ Where F1, F2, etc. are authority identity fingerprints the client trusts.
+ Servers will only return a consensus if more than half of the requested
+ authorities have signed the document, otherwise a 404 error will be sent
+ back. The fingerprints can be shortened to a length of any multiple of
+ two, using only the leftmost part of the encoded fingerprint. Tor uses
+ 3 bytes (6 hex characters) of the fingerprint.
+
+ Clients SHOULD sort the fingerprints in ascending order. Server MUST
+ accept any order.
+
+ Clients SHOULD use this format when requesting consensus documents from
+ directory authority servers and from caches running a version of Tor
+ that is known to support this URL format.
+
+ A concatenated set of all the current key certificates should be available
+ at:
+ http:///tor/keys/all.z
+
+ The key certificate for this server (if it is an authority) should be
+ available at:
+ http:///tor/keys/authority.z
+
+ The key certificate for an authority whose authority identity fingerprint
+ is should be available at:
+ http:///tor/keys/fp/.z
+
+ The key certificate whose signing key fingerprint is should be
+ available at:
+ http:///tor/keys/sk/.z
+
+ The key certificate whose identity key fingerprint is and whose signing
+ key fingerprint is should be available at:
+
+ http:///tor/keys/fp-sk/-.z
+
+ (As usual, clients may request multiple certificates using:
+ http:///tor/keys/fp-sk/-+-.z )
+ [The above fp-sk format was not supported before Tor 0.2.1.9-alpha.]
+
+ The most recent descriptor for a server whose identity key has a
+ fingerprint of should be available at:
+ http:///tor/server/fp/.z
+
+ The most recent descriptors for servers with identity fingerprints
+ ,, should be available at:
+ http:///tor/server/fp/++.z
+
+ (NOTE: Implementations SHOULD NOT download descriptors by identity key
+ fingerprint. This allows a corrupted server (in collusion with a cache) to
+ provide a unique descriptor to a client, and thereby partition that client
+ from the rest of the network.)
+
+ The server descriptor with (descriptor) digest (in hex) should be
+ available at:
+ http:///tor/server/d/.z
+
+ The most recent descriptors with digests ,, should be
+ available at:
+ http:///tor/server/d/++.z
+
+ The most recent descriptor for this server should be at:
+ http:///tor/server/authority.z
+ [Nothing in the Tor protocol uses this resource yet, but it is useful
+ for debugging purposes. Also, the official Tor implementations
+ (starting at 0.1.1.x) use this resource to test whether a server's
+ own DirPort is reachable.]
+
+ A concatenated set of the most recent descriptors for all known servers
+ should be available at:
+ http:///tor/server/all.z
+
+ Extra-info documents are available at the URLS
+ http:///tor/extra/d/...
+ http:///tor/extra/fp/...
+ http:///tor/extra/all[.z]
+ http:///tor/extra/authority[.z]
+ (As for /tor/server/ URLs: supports fetching extra-info
+ documents by their digest, by the fingerprint of their servers,
+ or all at once. When serving by fingerprint, we serve the
+ extra-info that corresponds to the descriptor we would serve by
+ that fingerprint. Only directory authorities of version
+ 0.2.0.1-alpha or later are guaranteed to support the first
+ three classes of URLs. Caches may support them, and MUST
+ support them if they have advertised "caches-extra-info".)
+
+ For debugging, directories SHOULD expose non-compressed objects at URLs like
+ the above, but without the final ".z".
+ Clients MUST handle compressed concatenated information in two forms:
+ - A concatenated list of zlib-compressed objects.
+ - A zlib-compressed concatenated list of objects.
+ Directory servers MAY generate either format: the former requires less
+ CPU, but the latter requires less bandwidth.
+
+ Clients SHOULD use upper case letters (A-F) when base16-encoding
+ fingerprints. Servers MUST accept both upper and lower case fingerprints
+ in requests.
+
+5. Client operation: downloading information
+
+ Every Tor that is not a directory server (that is, those that do
+ not have a DirPort set) implements this section.
+
+5.1. Downloading network-status documents
+
+ Each client maintains a list of directory authorities. Insofar as
+ possible, clients SHOULD all use the same list.
+
+ Clients try to have a live consensus network-status document at all times.
+ A network-status document is "live" if the time in its valid-until field
+ has not passed.
+
+ If a client is missing a live network-status document, it tries to fetch
+ it from a directory cache (or from an authority if it knows no caches).
+ On failure, the client waits briefly, then tries that network-status
+ document again from another cache. The client does not build circuits
+ until it has a live network-status consensus document, and it has
+ descriptors for more than 1/4 of the routers that it believes are running.
+
+ (Note: clients can and should pick caches based on the network-status
+ information they have: once they have first fetched network-status info
+ from an authority, they should not need to go to the authority directly
+ again.)
+
+ To avoid swarming the caches whenever a consensus expires, the
+ clients download new consensuses at a randomly chosen time after the
+ caches are expected to have a fresh consensus, but before their
+ consensus will expire. (This time is chosen uniformly at random from
+ the interval between the time 3/4 into the first interval after the
+ consensus is no longer fresh, and 7/8 of the time remaining after
+ that before the consensus is invalid.)
+
+ [For example, if a cache has a consensus that became valid at 1:00,
+ and is fresh until 2:00, and expires at 4:00, that cache will fetch
+ a new consensus at a random time between 2:45 and 3:50, since 3/4
+ of the one-hour interval is 45 minutes, and 7/8 of the remaining 75
+ minutes is 65 minutes.]
+
+5.2. Downloading and storing router descriptors
+
+ Clients try to have the best descriptor for each router. A descriptor is
+ "best" if:
+ * It is listed in the consensus network-status document.
+
+ Periodically (currently every 10 seconds) clients check whether there are
+ any "downloadable" descriptors. A descriptor is downloadable if:
+ - It is the "best" descriptor for some router.
+ - The descriptor was published at least 10 minutes in the past.
+ (This prevents clients from trying to fetch descriptors that the
+ mirrors have probably not yet retrieved and cached.)
+ - The client does not currently have it.
+ - The client is not currently trying to download it.
+ - The client would not discard it immediately upon receiving it.
+ - The client thinks it is running and valid (see 6.1 below).
+
+ If at least 16 known routers have downloadable descriptors, or if
+ enough time (currently 10 minutes) has passed since the last time the
+ client tried to download descriptors, it launches requests for all
+ downloadable descriptors, as described in 5.3 below.
+
+ When a descriptor download fails, the client notes it, and does not
+ consider the descriptor downloadable again until a certain amount of time
+ has passed. (Currently 0 seconds for the first failure, 60 seconds for the
+ second, 5 minutes for the third, 10 minutes for the fourth, and 1 day
+ thereafter.) Periodically (currently once an hour) clients reset the
+ failure count.
+
+ Clients retain the most recent descriptor they have downloaded for each
+ router so long as it is not too old (currently, 48 hours), OR so long as
+ no better descriptor has been downloaded for the same router.
+
+ [Versions of Tor before 0.1.2.3-alpha would discard descriptors simply for
+ being published too far in the past.] [The code seems to discard
+ descriptors in all cases after they're 5 days old. True? -RD]
+
+5.3. Managing downloads
+
+ When a client has no consensus network-status document, it downloads it
+ from a randomly chosen authority. In all other cases, the client
+ downloads from caches randomly chosen from among those believed to be V2
+ directory servers. (This information comes from the network-status
+ documents; see 6 below.)
+
+ When downloading multiple router descriptors, the client chooses multiple
+ mirrors so that:
+ - At least 3 different mirrors are used, except when this would result
+ in more than one request for under 4 descriptors.
+ - No more than 128 descriptors are requested from a single mirror.
+ - Otherwise, as few mirrors as possible are used.
+ After choosing mirrors, the client divides the descriptors among them
+ randomly.
+
+ After receiving any response client MUST discard any network-status
+ documents and descriptors that it did not request.
+
+6. Using directory information
+
+ Everyone besides directory authorities uses the approaches in this section
+ to decide which servers to use and what their keys are likely to be.
+ (Directory authorities just believe their own opinions, as in 3.1 above.)
+
+6.1. Choosing routers for circuits.
+
+ Circuits SHOULD NOT be built until the client has enough directory
+ information: a live consensus network status [XXXX fallback?] and
+ descriptors for at least 1/4 of the servers believed to be running.
+
+ A server is "listed" if it is included by the consensus network-status
+ document. Clients SHOULD NOT use unlisted servers.
+
+ These flags are used as follows:
+
+ - Clients SHOULD NOT use non-'Valid' or non-'Running' routers unless
+ requested to do so.
+
+ - Clients SHOULD NOT use non-'Fast' routers for any purpose other than
+ very-low-bandwidth circuits (such as introduction circuits).
+
+ - Clients SHOULD NOT use non-'Stable' routers for circuits that are
+ likely to need to be open for a very long time (such as those used for
+ IRC or SSH connections).
+
+ - Clients SHOULD NOT choose non-'Guard' nodes when picking entry guard
+ nodes.
+
+ - Clients SHOULD NOT download directory information from non-'V2Dir'
+ caches.
+
+ See the "path-spec.txt" document for more details.
+
+6.2. Managing naming
+
+ In order to provide human-memorable names for individual server
+ identities, some directory servers bind names to IDs. Clients handle
+ names in two ways:
+
+ When a client encounters a name it has not mapped before:
+
+ If the consensus lists any router with that name as "Named", or if
+ consensus-method 2 or later is in use and the consensus lists any
+ router with that name as having the "Unnamed" flag, then the name is
+ bound. (It's bound to the ID listed in the entry with the Named,
+ or to an unknown ID if no name is found.)
+
+ When the user refers to a bound name, the implementation SHOULD provide
+ only the router with ID bound to that name, and no other router, even
+ if the router with the right ID can't be found.
+
+ When a user tries to refer to a non-bound name, the implementation SHOULD
+ warn the user. After warning the user, the implementation MAY use any
+ router that advertises the name.
+
+ Not every router needs a nickname. When a router doesn't configure a
+ nickname, it publishes with the default nickname "Unnamed". Authorities
+ SHOULD NOT ever mark a router with this nickname as Named; client software
+ SHOULD NOT ever use a router in response to a user request for a router
+ called "Unnamed".
+
+6.3. Software versions
+
+ An implementation of Tor SHOULD warn when it has fetched a consensus
+ network-status, and it is running a software version not listed.
+
+6.4. Warning about a router's status.
+
+ If a router tries to publish its descriptor to a Naming authority
+ that has its nickname mapped to another key, the router SHOULD
+ warn the operator that it is either using the wrong key or is using
+ an already claimed nickname.
+
+ If a router has fetched a consensus document,, and the
+ authorities do not publish a binding for the router's nickname, the
+ router MAY remind the operator that the chosen nickname is not
+ bound to this key at the authorities, and suggest contacting the
+ authority operators.
+
+ ...
+
+6.5. Router protocol versions
+
+ A client should believe that a router supports a given feature if that
+ feature is supported by the router or protocol versions in more than half
+ of the live networkstatuses' "v" entries for that router. In other words,
+ if the "v" entries for some router are:
+ v Tor 0.0.8pre1 (from authority 1)
+ v Tor 0.1.2.11 (from authority 2)
+ v FutureProtocolDescription 99 (from authority 3)
+ then the client should believe that the router supports any feature
+ supported by 0.1.2.11.
+
+ This is currently equivalent to believing the median declared version for
+ a router in all live networkstatuses.
+
+7. Standards compliance
+
+ All clients and servers MUST support HTTP 1.0. Clients and servers MAY
+ support later versions of HTTP as well.
+
+7.1. HTTP headers
+
+ Servers MAY set the Content-Length: header. Servers SHOULD set
+ Content-Encoding to "deflate" or "identity".
+
+ Servers MAY include an X-Your-Address-Is: header, whose value is the
+ apparent IP address of the client connecting to them (as a dotted quad).
+ For directory connections tunneled over a BEGIN_DIR stream, servers SHOULD
+ report the IP from which the circuit carrying the BEGIN_DIR stream reached
+ them. [Servers before version 0.1.2.5-alpha reported 127.0.0.1 for all
+ BEGIN_DIR-tunneled connections.]
+
+ Servers SHOULD disable caching of multiple network statuses or multiple
+ router descriptors. Servers MAY enable caching of single descriptors,
+ single network statuses, the list of all router descriptors, a v1
+ directory, or a v1 running routers document. XXX mention times.
+
+7.2. HTTP status codes
+
+ Tor delivers the following status codes. Some were chosen without much
+ thought; other code SHOULD NOT rely on specific status codes yet.
+
+ 200 -- the operation completed successfully
+ -- the user requested statuses or serverdescs, and none of the ones we
+ requested were found (0.2.0.4-alpha and earlier).
+
+ 304 -- the client specified an if-modified-since time, and none of the
+ requested resources have changed since that time.
+
+ 400 -- the request is malformed, or
+ -- the URL is for a malformed variation of one of the URLs we support,
+ or
+ -- the client tried to post to a non-authority, or
+ -- the authority rejected a malformed posted document, or
+
+ 404 -- the requested document was not found.
+ -- the user requested statuses or serverdescs, and none of the ones
+ requested were found (0.2.0.5-alpha and later).
+
+ 503 -- we are declining the request in order to save bandwidth
+ -- user requested some items that we ordinarily generate or store,
+ but we do not have any available.
+
+9. Backward compatibility and migration plans
+
+ Until Tor versions before 0.1.1.x are completely obsolete, directory
+ authorities should generate, and mirrors should download and cache, v1
+ directories and running-routers lists, and allow old clients to download
+ them. These documents and the rules for retrieving, serving, and caching
+ them are described in dir-spec-v1.txt.
+
+ Until Tor versions before 0.2.0.x are completely obsolete, directory
+ authorities should generate, mirrors should download and cache, v2
+ network-status documents, and allow old clients to download them.
+ Additionally, all directory servers and caches should download, store, and
+ serve any router descriptor that is required because of v2 network-status
+ documents. These documents and the rules for retrieving, serving, and
+ caching them are described in dir-spec-v1.txt.
+
+A. Consensus-negotiation timeline.
+
+
+ Period begins: this is the Published time.
+ Everybody sends votes
+ Reconciliation: everybody tries to fetch missing votes.
+ consensus may exist at this point.
+ End of voting period:
+ everyone swaps signatures.
+ Now it's okay for caches to download
+ Now it's okay for clients to download.
+
+ Valid-after/valid-until switchover
+
diff --git a/orchid/doc/spec/path-spec.txt b/orchid/doc/spec/path-spec.txt
new file mode 100644
index 00000000..78f3b63b
--- /dev/null
+++ b/orchid/doc/spec/path-spec.txt
@@ -0,0 +1,437 @@
+
+ Tor Path Specification
+
+ Roger Dingledine
+ Nick Mathewson
+
+Note: This is an attempt to specify Tor as currently implemented. Future
+versions of Tor will implement improved algorithms.
+
+This document tries to cover how Tor chooses to build circuits and assign
+streams to circuits. Other implementations MAY take other approaches, but
+implementors should be aware of the anonymity and load-balancing implications
+of their choices.
+
+ THIS SPEC ISN'T DONE YET.
+
+1. General operation
+
+ Tor begins building circuits as soon as it has enough directory
+ information to do so (see section 5 of dir-spec.txt). Some circuits are
+ built preemptively because we expect to need them later (for user
+ traffic), and some are built because of immediate need (for user traffic
+ that no current circuit can handle, for testing the network or our
+ reachability, and so on).
+
+ When a client application creates a new stream (by opening a SOCKS
+ connection or launching a resolve request), we attach it to an appropriate
+ open circuit if one exists, or wait if an appropriate circuit is
+ in-progress. We launch a new circuit only
+ if no current circuit can handle the request. We rotate circuits over
+ time to avoid some profiling attacks.
+
+ To build a circuit, we choose all the nodes we want to use, and then
+ construct the circuit. Sometimes, when we want a circuit that ends at a
+ given hop, and we have an appropriate unused circuit, we "cannibalize" the
+ existing circuit and extend it to the new terminus.
+
+ These processes are described in more detail below.
+
+ This document describes Tor's automatic path selection logic only; path
+ selection can be overridden by a controller (with the EXTENDCIRCUIT and
+ ATTACHSTREAM commands). Paths constructed through these means may
+ violate some constraints given below.
+
+1.1. Terminology
+
+ A "path" is an ordered sequence of nodes, not yet built as a circuit.
+
+ A "clean" circuit is one that has not yet been used for any traffic.
+
+ A "fast" or "stable" or "valid" node is one that has the 'Fast' or
+ 'Stable' or 'Valid' flag
+ set respectively, based on our current directory information. A "fast"
+ or "stable" circuit is one consisting only of "fast" or "stable" nodes.
+
+ In an "exit" circuit, the final node is chosen based on waiting stream
+ requests if any, and in any case it avoids nodes with exit policy of
+ "reject *:*". An "internal" circuit, on the other hand, is one where
+ the final node is chosen just like a middle node (ignoring its exit
+ policy).
+
+ A "request" is a client-side stream or DNS resolve that needs to be
+ served by a circuit.
+
+ A "pending" circuit is one that we have started to build, but which has
+ not yet completed.
+
+ A circuit or path "supports" a request if it is okay to use the
+ circuit/path to fulfill the request, according to the rules given below.
+ A circuit or path "might support" a request if some aspect of the request
+ is unknown (usually its target IP), but we believe the path probably
+ supports the request according to the rules given below.
+
+1.1. A server's bandwidth
+
+ Old versions of Tor did not report bandwidths in network status
+ documents, so clients had to learn them from the routers' advertised
+ server descriptors.
+
+ For versions of Tor prior to 0.2.1.17-rc, everywhere below where we
+ refer to a server's "bandwidth", we mean its clipped advertised
+ bandwidth, computed by taking the smaller of the 'rate' and
+ 'observed' arguments to the "bandwidth" element in the server's
+ descriptor. If a router's advertised bandwidth is greater than
+ MAX_BELIEVABLE_BANDWIDTH (currently 10 MB/s), we clipped to that
+ value.
+
+ For more recent versions of Tor, we take the bandwidth value declared
+ in the consensus, and fall back to the clipped advertised bandwidth
+ only if the consensus does not have bandwidths listed.
+
+2. Building circuits
+
+2.1. When we build
+
+2.1.1. Clients build circuits preemptively
+
+ When running as a client, Tor tries to maintain at least a certain
+ number of clean circuits, so that new streams can be handled
+ quickly. To increase the likelihood of success, Tor tries to
+ predict what circuits will be useful by choosing from among nodes
+ that support the ports we have used in the recent past (by default
+ one hour). Specifically, on startup Tor tries to maintain one clean
+ fast exit circuit that allows connections to port 80, and at least
+ two fast clean stable internal circuits in case we get a resolve
+ request or hidden service request (at least three if we _run_ a
+ hidden service).
+
+ After that, Tor will adapt the circuits that it preemptively builds
+ based on the requests it sees from the user: it tries to have two fast
+ clean exit circuits available for every port seen within the past hour
+ (each circuit can be adequate for many predicted ports -- it doesn't
+ need two separate circuits for each port), and it tries to have the
+ above internal circuits available if we've seen resolves or hidden
+ service activity within the past hour. If there are 12 or more clean
+ circuits open, it doesn't open more even if it has more predictions.
+
+ Only stable circuits can "cover" a port that is listed in the
+ LongLivedPorts config option. Similarly, hidden service requests
+ to ports listed in LongLivedPorts make us create stable internal
+ circuits.
+
+ Note that if there are no requests from the user for an hour, Tor
+ will predict no use and build no preemptive circuits.
+
+ The Tor client SHOULD NOT store its list of predicted requests to a
+ persistent medium.
+
+2.1.2. Clients build circuits on demand
+
+ Additionally, when a client request exists that no circuit (built or
+ pending) might support, we create a new circuit to support the request.
+ For exit connections, we pick an exit node that will handle the
+ most pending requests (choosing arbitrarily among ties), launch a
+ circuit to end there, and repeat until every unattached request
+ might be supported by a pending or built circuit. For internal
+ circuits, we pick an arbitrary acceptable path, repeating as needed.
+
+ In some cases we can reuse an already established circuit if it's
+ clean; see Section 2.3 (cannibalizing circuits) for details.
+
+2.1.3. Servers build circuits for testing reachability and bandwidth
+
+ Tor servers test reachability of their ORPort once they have
+ successfully built a circuit (on start and whenever their IP address
+ changes). They build an ordinary fast internal circuit with themselves
+ as the last hop. As soon as any testing circuit succeeds, the Tor
+ server decides it's reachable and is willing to publish a descriptor.
+
+ We launch multiple testing circuits (one at a time), until we
+ have NUM_PARALLEL_TESTING_CIRC (4) such circuits open. Then we
+ do a "bandwidth test" by sending a certain number of relay drop
+ cells down each circuit: BandwidthRate * 10 / CELL_NETWORK_SIZE
+ total cells divided across the four circuits, but never more than
+ CIRCWINDOW_START (1000) cells total. This exercises both outgoing and
+ incoming bandwidth, and helps to jumpstart the observed bandwidth
+ (see dir-spec.txt).
+
+ Tor servers also test reachability of their DirPort once they have
+ established a circuit, but they use an ordinary exit circuit for
+ this purpose.
+
+2.1.4. Hidden-service circuits
+
+ See section 4 below.
+
+2.1.5. Rate limiting of failed circuits
+
+ If we fail to build a circuit N times in a X second period (see Section
+ 2.3 for how this works), we stop building circuits until the X seconds
+ have elapsed.
+ XXXX
+
+2.1.6. When to tear down circuits
+
+ XXXX
+
+2.2. Path selection and constraints
+
+ We choose the path for each new circuit before we build it. We choose the
+ exit node first, followed by the other nodes in the circuit. All paths
+ we generate obey the following constraints:
+ - We do not choose the same router twice for the same path.
+ - We do not choose any router in the same family as another in the same
+ path.
+ - We do not choose more than one router in a given /16 subnet
+ (unless EnforceDistinctSubnets is 0).
+ - We don't choose any non-running or non-valid router unless we have
+ been configured to do so. By default, we are configured to allow
+ non-valid routers in "middle" and "rendezvous" positions.
+ - If we're using Guard nodes, the first node must be a Guard (see 5
+ below)
+ - XXXX Choosing the length
+
+ For circuits that do not need to be "fast", when choosing among
+ multiple candidates for a path element, we choose randomly.
+
+ For "fast" circuits, we pick a given router as an exit with probability
+ proportional to its bandwidth.
+
+ For non-exit positions on "fast" circuits, we pick routers as above, but
+ we weight the bandwidth of Exit-flagged nodes depending
+ on the fraction of bandwidth available from non-Exit nodes. Call the
+ total bandwidth for Exit nodes under consideration E,
+ and the total bandwidth for all nodes under
+ consideration T. If E..exit, the request is rewritten to a request for
+ , and the request is only supported by the exit whose nickname
+ or fingerprint is .
+
+2.3. Cannibalizing circuits
+
+ If we need a circuit and have a clean one already established, in
+ some cases we can adapt the clean circuit for our new
+ purpose. Specifically,
+
+ For hidden service interactions, we can "cannibalize" a clean internal
+ circuit if one is available, so we don't need to build those circuits
+ from scratch on demand.
+
+ We can also cannibalize clean circuits when the client asks to exit
+ at a given node -- either via the ".exit" notation or because the
+ destination is running at the same location as an exit node.
+
+
+2.4. Handling failure
+
+ If an attempt to extend a circuit fails (either because the first create
+ failed or a subsequent extend failed) then the circuit is torn down and is
+ no longer pending. (XXXX really?) Requests that might have been
+ supported by the pending circuit thus become unsupported, and a new
+ circuit needs to be constructed.
+
+ If a stream "begin" attempt fails with an EXITPOLICY error, we
+ decide that the exit node's exit policy is not correctly advertised,
+ so we treat the exit node as if it were a non-exit until we retrieve
+ a fresh descriptor for it.
+
+ XXXX
+
+3. Attaching streams to circuits
+
+ When a circuit that might support a request is built, Tor tries to attach
+ the request's stream to the circuit and sends a BEGIN, BEGIN_DIR,
+ or RESOLVE relay
+ cell as appropriate. If the request completes unsuccessfully, Tor
+ considers the reason given in the CLOSE relay cell. [XXX yes, and?]
+
+
+ After a request has remained unattached for SocksTimeout (2 minutes
+ by default), Tor abandons the attempt and signals an error to the
+ client as appropriate (e.g., by closing the SOCKS connection).
+
+ XXX Timeouts and when Tor auto-retries.
+ * What stream-end-reasons are appropriate for retrying.
+
+ If no reply to BEGIN/RESOLVE, then the stream will timeout and fail.
+
+4. Hidden-service related circuits
+
+ XXX Tracking expected hidden service use (client-side and hidserv-side)
+
+5. Guard nodes
+
+ We use Guard nodes (also called "helper nodes" in the literature) to
+ prevent certain profiling attacks. Here's the risk: if we choose entry and
+ exit nodes at random, and an attacker controls C out of N servers
+ (ignoring bandwidth), then the
+ attacker will control the entry and exit node of any given circuit with
+ probability (C/N)^2. But as we make many different circuits over time,
+ then the probability that the attacker will see a sample of about (C/N)^2
+ of our traffic goes to 1. Since statistical sampling works, the attacker
+ can be sure of learning a profile of our behavior.
+
+ If, on the other hand, we picked an entry node and held it fixed, we would
+ have probability C/N of choosing a bad entry and being profiled, and
+ probability (N-C)/N of choosing a good entry and not being profiled.
+
+ When guard nodes are enabled, Tor maintains an ordered list of entry nodes
+ as our chosen guards, and stores this list persistently to disk. If a Guard
+ node becomes unusable, rather than replacing it, Tor adds new guards to the
+ end of the list. When choosing the first hop of a circuit, Tor
+ chooses at
+ random from among the first NumEntryGuards (default 3) usable guards on the
+ list. If there are not at least 2 usable guards on the list, Tor adds
+ routers until there are, or until there are no more usable routers to add.
+
+ A guard is unusable if any of the following hold:
+ - it is not marked as a Guard by the networkstatuses,
+ - it is not marked Valid (and the user hasn't set AllowInvalid entry)
+ - it is not marked Running
+ - Tor couldn't reach it the last time it tried to connect
+
+ A guard is unusable for a particular circuit if any of the rules for path
+ selection in 2.2 are not met. In particular, if the circuit is "fast"
+ and the guard is not Fast, or if the circuit is "stable" and the guard is
+ not Stable, or if the guard has already been chosen as the exit node in
+ that circuit, Tor can't use it as a guard node for that circuit.
+
+ If the guard is excluded because of its status in the networkstatuses for
+ over 30 days, Tor removes it from the list entirely, preserving order.
+
+ If Tor fails to connect to an otherwise usable guard, it retries
+ periodically: every hour for six hours, every 4 hours for 3 days, every
+ 18 hours for a week, and every 36 hours thereafter. Additionally, Tor
+ retries unreachable guards the first time it adds a new guard to the list,
+ since it is possible that the old guards were only marked as unreachable
+ because the network was unreachable or down.
+
+ Tor does not add a guard persistently to the list until the first time we
+ have connected to it successfully.
+
+6. Router descriptor purposes
+
+ There are currently three "purposes" supported for router descriptors:
+ general, controller, and bridge. Most descriptors are of type general
+ -- these are the ones listed in the consensus, and the ones fetched
+ and used in normal cases.
+
+ Controller-purpose descriptors are those delivered by the controller
+ and labelled as such: they will be kept around (and expire like
+ normal descriptors), and they can be used by the controller in its
+ CIRCUITEXTEND commands. Otherwise they are ignored by Tor when it
+ chooses paths.
+
+ Bridge-purpose descriptors are for routers that are used as bridges. See
+ doc/design-paper/blocking.pdf for more design explanation, or proposal
+ 125 for specific details. Currently bridge descriptors are used in place
+ of normal entry guards, for Tor clients that have UseBridges enabled.
+
+
+X. Old notes
+
+X.1. Do we actually do this?
+
+How to deal with network down.
+ - While all helpers are down/unreachable and there are no established
+ or on-the-way testing circuits, launch a testing circuit. (Do this
+ periodically in the same way we try to establish normal circuits
+ when things are working normally.)
+ (Testing circuits are a special type of circuit, that streams won't
+ attach to by accident.)
+ - When a testing circuit succeeds, mark all helpers up and hold
+ the testing circuit open.
+ - If a connection to a helper succeeds, close all testing circuits.
+ Else mark that helper down and try another.
+ - If the last helper is marked down and we already have a testing
+ circuit established, then add the first hop of that testing circuit
+ to the end of our helper node list, close that testing circuit,
+ and go back to square one. (Actually, rather than closing the
+ testing circuit, can we get away with converting it to a normal
+ circuit and beginning to use it immediately?)
+
+ [Do we actually do any of the above? If so, let's spec it. If not, let's
+ remove it. -NM]
+
+X.2. A thing we could do to deal with reachability.
+
+And as a bonus, it leads to an answer to Nick's attack ("If I pick
+my helper nodes all on 18.0.0.0:*, then I move, you'll know where I
+bootstrapped") -- the answer is to pick your original three helper nodes
+without regard for reachability. Then the above algorithm will add some
+more that are reachable for you, and if you move somewhere, it's more
+likely (though not certain) that some of the originals will become useful.
+Is that smart or just complex?
+
+X.3. Some stuff that worries me about entry guards. 2006 Jun, Nickm.
+
+ It is unlikely for two users to have the same set of entry guards.
+ Observing a user is sufficient to learn its entry guards. So, as we move
+ around, entry guards make us linkable. If we want to change guards when
+ our location (IP? subnet?) changes, we have two bad options. We could
+ - Drop the old guards. But if we go back to our old location,
+ we'll not use our old guards. For a laptop that sometimes gets used
+ from work and sometimes from home, this is pretty fatal.
+ - Remember the old guards as associated with the old location, and use
+ them again if we ever go back to the old location. This would be
+ nasty, since it would force us to record where we've been.
+
+ [Do we do any of this now? If not, this should move into 099-misc or
+ 098-todo. -NM]
+
diff --git a/orchid/doc/spec/rend-spec.txt b/orchid/doc/spec/rend-spec.txt
new file mode 100644
index 00000000..f0300926
--- /dev/null
+++ b/orchid/doc/spec/rend-spec.txt
@@ -0,0 +1,751 @@
+
+ Tor Rendezvous Specification
+
+0. Overview and preliminaries
+
+ Read
+ https://www.torproject.org/doc/design-paper/tor-design.html#sec:rendezvous
+ before you read this specification. It will make more sense.
+
+ Rendezvous points provide location-hidden services (server
+ anonymity) for the onion routing network. With rendezvous points,
+ Bob can offer a TCP service (say, a webserver) via the onion
+ routing network, without revealing the IP of that service.
+
+ Bob does this by anonymously advertising a public key for his
+ service, along with a list of onion routers to act as "Introduction
+ Points" for his service. He creates forward circuits to those
+ introduction points, and tells them about his public key. To
+ connect to Bob, Alice first builds a circuit to an OR to act as
+ her "Rendezvous Point." She then connects to one of Bob's chosen
+ introduction points, optionally provides authentication or
+ authorization information, and asks it to tell him about her Rendezvous
+ Point (RP). If Bob chooses to answer, he builds a circuit to her
+ RP, and tells it to connect him to Alice. The RP joins their
+ circuits together, and begins relaying cells. Alice's 'BEGIN'
+ cells are received directly by Bob's OP, which passes data to
+ and from the local server implementing Bob's service.
+
+ Below we describe a network-level specification of this service,
+ along with interfaces to make this process transparent to Alice
+ (so long as she is using an OP).
+
+0.1. Notation, conventions and prerequisites
+
+ In the specifications below, we use the same notation and terminology
+ as in "tor-spec.txt". The service specified here also requires the
+ existence of an onion routing network as specified in that file.
+
+ H(x) is a SHA1 digest of x.
+ PKSign(SK,x) is a PKCS.1-padded RSA signature of x with SK.
+ PKEncrypt(SK,x) is a PKCS.1-padded RSA encryption of x with SK.
+ Public keys are all RSA, and encoded in ASN.1.
+ All integers are stored in network (big-endian) order.
+ All symmetric encryption uses AES in counter mode, except where
+ otherwise noted.
+
+ In all discussions, "Alice" will refer to a user connecting to a
+ location-hidden service, and "Bob" will refer to a user running a
+ location-hidden service.
+
+ An OP is (as defined elsewhere) an "Onion Proxy" or Tor client.
+
+ An OR is (as defined elsewhere) an "Onion Router" or Tor server.
+
+ An "Introduction point" is a Tor server chosen to be Bob's medium-term
+ 'meeting place'. A "Rendezvous point" is a Tor server chosen by Alice to
+ be a short-term communication relay between her and Bob. All Tor servers
+ potentially act as introduction and rendezvous points.
+
+0.2. Protocol outline
+
+ 1. Bob->Bob's OP: "Offer IP:Port as
+ public-key-name:Port". [configuration]
+ (We do not specify this step; it is left to the implementor of
+ Bob's OP.)
+
+ 2. Bob's OP generates keypair and rendezvous service descriptor:
+ "Meet public-key X at introduction point A, B, or C." (signed)
+
+ 3. Bob's OP->Introduction point via Tor: [introduction setup]
+ "This pk is me."
+
+ 4. Bob's OP->directory service via Tor: publishes Bob's service
+ descriptor [advertisement]
+
+ 5. Out of band, Alice receives a [x.y.]z.onion:port address.
+ She opens a SOCKS connection to her OP, and requests
+ x.y.z.onion:port.
+
+ 6. Alice's OP retrieves Bob's descriptor via Tor. [descriptor lookup.]
+
+ 7. Alice's OP chooses a rendezvous point, opens a circuit to that
+ rendezvous point, and establishes a rendezvous circuit. [rendezvous
+ setup.]
+
+ 8. Alice connects to the Introduction point via Tor, and tells it about
+ her rendezvous point and optional authentication/authorization
+ information. (Encrypted to Bob.) [Introduction 1]
+
+ 9. The Introduction point passes this on to Bob's OP via Tor, along the
+ introduction circuit. [Introduction 2]
+
+ 10. Bob's OP decides whether to connect to Alice, and if so, creates a
+ circuit to Alice's RP via Tor. Establishes a shared circuit.
+ [Rendezvous.]
+
+ 11. Alice's OP sends begin cells to Bob's OP. [Connection]
+
+0.3. Constants and new cell types
+
+ Relay cell types
+ 32 -- RELAY_ESTABLISH_INTRO
+ 33 -- RELAY_ESTABLISH_RENDEZVOUS
+ 34 -- RELAY_INTRODUCE1
+ 35 -- RELAY_INTRODUCE2
+ 36 -- RELAY_RENDEZVOUS1
+ 37 -- RELAY_RENDEZVOUS2
+ 38 -- RELAY_INTRO_ESTABLISHED
+ 39 -- RELAY_RENDEZVOUS_ESTABLISHED
+ 40 -- RELAY_COMMAND_INTRODUCE_ACK
+
+0.4. Version overview
+
+ There are several parts in the hidden service protocol that have
+ changed over time, each of them having its own version number, whereas
+ other parts remained the same. The following list of potentially
+ versioned protocol parts should help reduce some confusion:
+
+ - Hidden service descriptor: the binary-based v0 was the default for
+ a long time, and an ascii-based v2 has been added by proposal
+ 114. See 1.2.
+
+ - Hidden service descriptor propagation mechanism: currently related to
+ the hidden service descriptor version -- v0 publishes to the original
+ hs directory authorities, whereas v2 publishes to a rotating subset
+ of relays with the "hsdir" flag; see 1.4 and 1.6.
+
+ - Introduction protocol for how to generate an introduction cell:
+ v0 specified a nickname for the rendezvous point and assumed the
+ relay would know about it, whereas v2 now specifies IP address,
+ port, and onion key so the relay doesn't need to already recognize
+ it. See 1.8.
+
+1. The Protocol
+
+1.1. Bob configures his local OP.
+
+ We do not specify a format for the OP configuration file. However,
+ OPs SHOULD allow Bob to provide more than one advertised service
+ per OP, and MUST allow Bob to specify one or more virtual ports per
+ service. Bob provides a mapping from each of these virtual ports
+ to a local IP:Port pair.
+
+1.2. Bob's OP generates service descriptors.
+
+ The first time the OP provides an advertised service, it generates
+ a public/private keypair (stored locally).
+
+ Beginning with 0.2.0.10-alpha, Bob's OP encodes "V2" descriptors. The
+ format of a "V2" descriptor is as follows:
+
+ "rendezvous-service-descriptor" descriptor-id NL
+
+ [At start, exactly once]
+
+ Indicates the beginning of the descriptor. "descriptor-id" is a
+ periodically changing identifier of 160 bits formatted as 32 base32
+ chars that is calculated by the hidden service and its clients. If
+ the optional "descriptor-cookie" is used, this "descriptor-id"
+ cannot be computed by anyone else. (Everyone can verify that this
+ "descriptor-id" belongs to the rest of the descriptor, even without
+ knowing the optional "descriptor-cookie", as described below.) The
+ "descriptor-id" is calculated by performing the following operation:
+
+ descriptor-id =
+ H(permanent-id | H(time-period | descriptor-cookie | replica))
+
+ "permanent-id" is the permanent identifier of the hidden service,
+ consisting of 80 bits. It can be calculated by computing the hash value
+ of the public hidden service key and truncating after the first 80 bits:
+
+ permanent-id = H(public-key)[:10]
+
+ "H(time-period | descriptor-cookie | replica)" is the (possibly
+ secret) id part that is
+ necessary to verify that the hidden service is the true originator
+ of this descriptor. It can only be created by the hidden service
+ and its clients, but the "signature" below can only be created by
+ the service.
+
+ "descriptor-cookie" is an optional secret password of 128 bits that
+ is shared between the hidden service provider and its clients.
+
+ "replica" denotes the number of the non-consecutive replica.
+
+ (Each descriptor is replicated on a number of _consecutive_ nodes
+ in the identifier ring by making every storing node responsible
+ for the identifier intervals starting from its 3rd predecessor's
+ ID to its own ID. In addition to that, every service publishes
+ multiple descriptors with different descriptor IDs in order to
+ distribute them to different places on the ring. Therefore,
+ "replica" chooses one of the _non-consecutive_ replicas. -KL)
+
+ The "time-period" changes periodically depending on the global time and
+ as a function of "permanent-id". The current value for "time-period" can
+ be calculated using the following formula:
+
+ time-period = (current-time + permanent-id-byte * 86400 / 256)
+ / 86400
+
+ "current-time" contains the current system time in seconds since
+ 1970-01-01 00:00, e.g. 1188241957. "permanent-id-byte" is the first
+ (unsigned) byte of the permanent identifier (which is in network
+ order), e.g. 143. Adding the product of "permanent-id-byte" and
+ 86400 (seconds per day), divided by 256, prevents "time-period" from
+ changing for all descriptors at the same time of the day. The result
+ of the overall operation is a (network-ordered) 32-bit integer, e.g.
+ 13753 or 0x000035B9 with the example values given above.
+
+ "version" version-number NL
+
+ [Exactly once]
+
+ The version number of this descriptor's format. In this case: 2.
+
+ "permanent-key" NL a public key in PEM format
+
+ [Exactly once]
+
+ The public key of the hidden service which is required to verify the
+ "descriptor-id" and the "signature".
+
+ "secret-id-part" secret-id-part NL
+
+ [Exactly once]
+
+ The result of the following operation as explained above, formatted as
+ 32 base32 chars. Using this secret id part, everyone can verify that
+ the signed descriptor belongs to "descriptor-id".
+
+ secret-id-part = H(time-period | descriptor-cookie | replica)
+
+ "publication-time" YYYY-MM-DD HH:MM:SS NL
+
+ [Exactly once]
+
+ A timestamp when this descriptor has been created.
+
+ "protocol-versions" version-string NL
+
+ [Exactly once]
+
+ A comma-separated list of recognized and permitted version numbers
+ for use in INTRODUCE cells; these versions are described in section
+ 1.8 below.
+
+ "introduction-points" NL encrypted-string
+
+ [At most once]
+
+ A list of introduction points. If the optional "descriptor-cookie" is
+ used, this list is encrypted with AES in CTR mode with a random
+ initialization vector of 128 bits that is written to
+ the beginning of the encrypted string, and the "descriptor-cookie" as
+ secret key of 128 bits length.
+
+ The string containing the introduction point data (either encrypted
+ or not) is encoded in base64, and surrounded with
+ "-----BEGIN MESSAGE-----" and "-----END MESSAGE-----".
+
+ The unencrypted string may begin with:
+
+ ["service-authentication" auth-type NL auth-data ... reserved]
+
+ [At start, any number]
+
+ The service-specific authentication data can be used to perform
+ client authentication. This data is independent of the selected
+ introduction point as opposed to "intro-authentication" below.
+
+ Subsequently, an arbitrary number of introduction point entries may
+ follow, each containing the following data:
+
+ "introduction-point" identifier NL
+
+ [At start, exactly once]
+
+ The identifier of this introduction point: the base-32 encoded
+ hash of this introduction point's identity key.
+
+ "ip-address" ip-address NL
+
+ [Exactly once]
+
+ The IP address of this introduction point.
+
+ "onion-port" port NL
+
+ [Exactly once]
+
+ The TCP port on which the introduction point is listening for
+ incoming onion requests.
+
+ "onion-key" NL a public key in PEM format
+
+ [Exactly once]
+
+ The public key that can be used to encrypt messages to this
+ introduction point.
+
+ "service-key" NL a public key in PEM format
+
+ [Exactly once]
+
+ The public key that can be used to encrypt messages to the hidden
+ service.
+
+ ["intro-authentication" auth-type NL auth-data ... reserved]
+
+ [Any number]
+
+ The introduction-point-specific authentication data can be used
+ to perform client authentication. This data depends on the
+ selected introduction point as opposed to "service-authentication"
+ above.
+
+ (This ends the fields in the encrypted portion of the descriptor.)
+
+ [It's ok for Bob to advertise 0 introduction points. He might want
+ to do that if he previously advertised some introduction points,
+ and now he doesn't have any. -RD]
+
+ "signature" NL signature-string
+
+ [At end, exactly once]
+
+ A signature of all fields above with the private key of the hidden
+ service.
+
+1.2.1. Other descriptor formats we don't use.
+
+ Support for the V0 descriptor format was dropped in 0.2.2.0-alpha-dev:
+
+ KL Key length [2 octets]
+ PK Bob's public key [KL octets]
+ TS A timestamp [4 octets]
+ NI Number of introduction points [2 octets]
+ Ipt A list of NUL-terminated ORs [variable]
+ SIG Signature of above fields [variable]
+
+ KL is the length of PK, in octets.
+ TS is the number of seconds elapsed since Jan 1, 1970.
+
+ The members of Ipt may be either (a) nicknames, or (b) identity key
+ digests, encoded in hex, and prefixed with a '$'.
+
+ The V1 descriptor format was understood and accepted from
+ 0.1.1.5-alpha-cvs to 0.2.0.6-alpha-dev, but no Tors generated it and
+ it was removed:
+
+ V Format byte: set to 255 [1 octet]
+ V Version byte: set to 1 [1 octet]
+ KL Key length [2 octets]
+ PK Bob's public key [KL octets]
+ TS A timestamp [4 octets]
+ PROTO Protocol versions: bitmask [2 octets]
+ NI Number of introduction points [2 octets]
+ For each introduction point: (as in INTRODUCE2 cells)
+ IP Introduction point's address [4 octets]
+ PORT Introduction point's OR port [2 octets]
+ ID Introduction point identity ID [20 octets]
+ KLEN Length of onion key [2 octets]
+ KEY Introduction point onion key [KLEN octets]
+ SIG Signature of above fields [variable]
+
+ A hypothetical "V1" descriptor, that has never been used but might
+ be useful for historical reasons, contains:
+
+ V Format byte: set to 255 [1 octet]
+ V Version byte: set to 1 [1 octet]
+ KL Key length [2 octets]
+ PK Bob's public key [KL octets]
+ TS A timestamp [4 octets]
+ PROTO Rendezvous protocol versions: bitmask [2 octets]
+ NA Number of auth mechanisms accepted [1 octet]
+ For each auth mechanism:
+ AUTHT The auth type that is supported [2 octets]
+ AUTHL Length of auth data [1 octet]
+ AUTHD Auth data [variable]
+ NI Number of introduction points [2 octets]
+ For each introduction point: (as in INTRODUCE2 cells)
+ ATYPE An address type (typically 4) [1 octet]
+ ADDR Introduction point's IP address [4 or 16 octets]
+ PORT Introduction point's OR port [2 octets]
+ AUTHT The auth type that is supported [2 octets]
+ AUTHL Length of auth data [1 octet]
+ AUTHD Auth data [variable]
+ ID Introduction point identity ID [20 octets]
+ KLEN Length of onion key [2 octets]
+ KEY Introduction point onion key [KLEN octets]
+ SIG Signature of above fields [variable]
+
+ AUTHT specifies which authentication/authorization mechanism is
+ required by the hidden service or the introduction point. AUTHD
+ is arbitrary data that can be associated with an auth approach.
+ Currently only AUTHT of [00 00] is supported, with an AUTHL of 0.
+ See section 2 of this document for details on auth mechanisms.
+
+1.3. Bob's OP establishes his introduction points.
+
+ The OP establishes a new introduction circuit to each introduction
+ point. These circuits MUST NOT be used for anything but hidden service
+ introduction. To establish the introduction, Bob sends a
+ RELAY_ESTABLISH_INTRO cell, containing:
+
+ KL Key length [2 octets]
+ PK Introduction public key [KL octets]
+ HS Hash of session info [20 octets]
+ SIG Signature of above information [variable]
+
+ [XXX011, need to add auth information here. -RD]
+
+ To prevent replay attacks, the HS field contains a SHA-1 hash based on the
+ shared secret KH between Bob's OP and the introduction point, as
+ follows:
+ HS = H(KH | "INTRODUCE")
+ That is:
+ HS = H(KH | [49 4E 54 52 4F 44 55 43 45])
+ (KH, as specified in tor-spec.txt, is H(g^xy | [00]) .)
+
+ Upon receiving such a cell, the OR first checks that the signature is
+ correct with the included public key. If so, it checks whether HS is
+ correct given the shared state between Bob's OP and the OR. If either
+ check fails, the OP discards the cell; otherwise, it associates the
+ circuit with Bob's public key, and dissociates any other circuits
+ currently associated with PK. On success, the OR sends Bob a
+ RELAY_INTRO_ESTABLISHED cell with an empty payload.
+
+ Bob's OP does not include its own public key in the RELAY_ESTABLISH_INTRO
+ cell, but the public key of a freshly generated introduction key pair.
+ The OP also includes these fresh public keys in the v2 hidden service
+ descriptor together with the other introduction point information. The
+ reason is that the introduction point does not need to and therefore
+ should not know for which hidden service it works, so as to prevent it
+ from tracking the hidden service's activity.
+
+1.4. Bob's OP advertises his service descriptor(s).
+
+ Bob's OP opens a stream to each directory server's directory port via Tor.
+ (He may re-use old circuits for this.) Over this stream, Bob's OP makes
+ an HTTP 'POST' request, to a URL "/tor/rendezvous/publish" relative to the
+ directory server's root, containing as its body Bob's service descriptor.
+
+ Bob should upload a service descriptor for each version format that
+ is supported in the current Tor network.
+
+ Upon receiving a descriptor, the directory server checks the signature,
+ and discards the descriptor if the signature does not match the enclosed
+ public key. Next, the directory server checks the timestamp. If the
+ timestamp is more than 24 hours in the past or more than 1 hour in the
+ future, or the directory server already has a newer descriptor with the
+ same public key, the server discards the descriptor. Otherwise, the
+ server discards any older descriptors with the same public key and
+ version format, and associates the new descriptor with the public key.
+ The directory server remembers this descriptor for at least 24 hours
+ after its timestamp. At least every 18 hours, Bob's OP uploads a
+ fresh descriptor.
+
+ Bob's OP publishes v2 descriptors to a changing subset of all v2 hidden
+ service directories. Therefore, Bob's OP opens a stream via Tor to each
+ responsible hidden service directory. (He may re-use old circuits
+ for this.) Over this stream, Bob's OP makes an HTTP 'POST' request to a
+ URL "/tor/rendezvous2/publish" relative to the hidden service
+ directory's root, containing as its body Bob's service descriptor.
+
+ At any time, there are 6 hidden service directories responsible for
+ keeping replicas of a descriptor; they consist of 2 sets of 3 hidden
+ service directories with consecutive onion IDs. Bob's OP learns about
+ the complete list of hidden service directories by filtering the
+ consensus status document received from the directory authorities. A
+ hidden service directory is deemed responsible for all descriptor IDs in
+ the interval from its direct predecessor, exclusive, to its own ID,
+ inclusive; it further holds replicas for its 2 predecessors. A
+ participant only trusts its own routing list and never learns about
+ routing information from other parties.
+
+ Bob's OP publishes a new v2 descriptor once an hour or whenever its
+ content changes. V2 descriptors can be found by clients within a given
+ time period of 24 hours, after which they change their ID as described
+ under 1.2. If a published descriptor would be valid for less than 60
+ minutes (= 2 x 30 minutes to allow the server to be 30 minutes behind
+ and the client 30 minutes ahead), Bob's OP publishes the descriptor
+ under the ID of both, the current and the next publication period.
+
+1.5. Alice receives a x.y.z.onion address.
+
+ When Alice receives a pointer to a location-hidden service, it is as a
+ hostname of the form "z.onion" or "y.z.onion" or "x.y.z.onion", where
+ z is a base-32 encoding of a 10-octet hash of Bob's service's public
+ key, computed as follows:
+
+ 1. Let H = H(PK).
+ 2. Let H' = the first 80 bits of H, considering each octet from
+ most significant bit to least significant bit.
+ 2. Generate a 16-character encoding of H', using base32 as defined
+ in RFC 3548.
+
+ (We only use 80 bits instead of the 160 bits from SHA1 because we
+ don't need to worry about arbitrary collisions, and because it will
+ make handling the url's more convenient.)
+
+ The string "x", if present, is the base-32 encoding of the
+ authentication/authorization required by the introduction point.
+ The string "y", if present, is the base-32 encoding of the
+ authentication/authorization required by the hidden service.
+ Omitting a string is taken to mean auth type [00 00].
+ See section 2 of this document for details on auth mechanisms.
+
+ [Yes, numbers are allowed at the beginning. See RFC 1123. -NM]
+
+1.6. Alice's OP retrieves a service descriptor.
+
+ Similarly to the description in section 1.4, Alice's OP fetches a v2
+ descriptor from a randomly chosen hidden service directory out of the
+ changing subset of 6 nodes. If the request is unsuccessful, Alice retries
+ the other remaining responsible hidden service directories in a random
+ order. Alice relies on Bob to care about a potential clock skew between
+ the two by possibly storing two sets of descriptors (see end of section
+ 1.4).
+
+ Alice's OP opens a stream via Tor to the chosen v2 hidden service
+ directory. (She may re-use old circuits for this.) Over this stream,
+ Alice's OP makes an HTTP 'GET' request for the document
+ "/tor/rendezvous2/", where z is replaced with the encoding of the
+ descriptor ID. The directory replies with a 404 HTTP response if it does
+ not recognize , and otherwise returns Bob's most recently uploaded
+ service descriptor.
+
+ If Alice's OP receives a 404 response, it tries the other directory
+ servers, and only fails the lookup if none recognize the public key hash.
+
+ Upon receiving a service descriptor, Alice verifies with the same process
+ as the directory server uses, described above in section 1.4.
+
+ The directory server gives a 400 response if it cannot understand Alice's
+ request.
+
+ Alice should cache the descriptor locally, but should not use
+ descriptors that are more than 24 hours older than their timestamp.
+ [Caching may make her partitionable, but she fetched it anonymously,
+ and we can't very well *not* cache it. -RD]
+
+1.7. Alice's OP establishes a rendezvous point.
+
+ When Alice requests a connection to a given location-hidden service,
+ and Alice's OP does not have an established circuit to that service,
+ the OP builds a rendezvous circuit. It does this by establishing
+ a circuit to a randomly chosen OR, and sending a
+ RELAY_ESTABLISH_RENDEZVOUS cell to that OR. The body of that cell
+ contains:
+
+ RC Rendezvous cookie [20 octets]
+
+ [XXX011 this looks like an auth mechanism. should we generalize here? -RD]
+
+ The rendezvous cookie is an arbitrary 20-byte value, chosen randomly by
+ Alice's OP.
+
+ Upon receiving a RELAY_ESTABLISH_RENDEZVOUS cell, the OR associates the
+ RC with the circuit that sent it. It replies to Alice with an empty
+ RELAY_RENDEZVOUS_ESTABLISHED cell to indicate success.
+
+ Alice's OP MUST NOT use the circuit which sent the cell for any purpose
+ other than rendezvous with the given location-hidden service.
+
+1.8. Introduction: from Alice's OP to Introduction Point
+
+ Alice builds a separate circuit to one of Bob's chosen introduction
+ points, and sends it a RELAY_INTRODUCE1 cell containing:
+
+ Cleartext
+ PK_ID Identifier for Bob's PK [20 octets]
+ Encrypted to Bob's PK: (in the v0 intro protocol)
+ RP Rendezvous point's nickname [20 octets]
+ RC Rendezvous cookie [20 octets]
+ g^x Diffie-Hellman data, part 1 [128 octets]
+ OR (in the v1 intro protocol)
+ VER Version byte: set to 1. [1 octet]
+ RP Rendezvous point nick or ID [42 octets]
+ RC Rendezvous cookie [20 octets]
+ g^x Diffie-Hellman data, part 1 [128 octets]
+ OR (in the v2 intro protocol)
+ VER Version byte: set to 2. [1 octet]
+ IP Rendezvous point's address [4 octets]
+ PORT Rendezvous point's OR port [2 octets]
+ ID Rendezvous point identity ID [20 octets]
+ KLEN Length of onion key [2 octets]
+ KEY Rendezvous point onion key [KLEN octets]
+ RC Rendezvous cookie [20 octets]
+ g^x Diffie-Hellman data, part 1 [128 octets]
+
+ PK_ID is the hash of Bob's public key. RP is NUL-padded and
+ terminated. In version 0, it must contain a nickname. In version 1,
+ it must contain EITHER a nickname or an identity key digest that is
+ encoded in hex and prefixed with a '$'.
+
+ The hybrid encryption to Bob's PK works just like the hybrid
+ encryption in CREATE cells (see tor-spec). Thus the payload of the
+ version 0 RELAY_INTRODUCE1 cell on the wire will contain
+ 20+42+16+20+20+128=246 bytes, and the version 1 and version 2
+ introduction formats have other sizes.
+
+ Through Tor 0.2.0.6-alpha, clients only generated the v0 introduction
+ format, whereas hidden services have understood and accepted v0,
+ v1, and v2 since 0.1.1.x. As of Tor 0.2.0.7-alpha and 0.1.2.18,
+ clients switched to using the v2 intro format.
+
+ If Alice has downloaded a v2 descriptor, she uses the contained public
+ key ("service-key") instead of Bob's public key to create the
+ RELAY_INTRODUCE1 cell as described above.
+
+1.8.1. Other introduction formats we don't use.
+
+ We briefly speculated about using the following format for the
+ "encrypted to Bob's PK" part of the introduction, but no Tors have
+ ever generated these.
+
+ VER Version byte: set to 3. [1 octet]
+ ATYPE An address type (typically 4) [1 octet]
+ ADDR Rendezvous point's IP address [4 or 16 octets]
+ PORT Rendezvous point's OR port [2 octets]
+ AUTHT The auth type that is supported [2 octets]
+ AUTHL Length of auth data [1 octet]
+ AUTHD Auth data [variable]
+ ID Rendezvous point identity ID [20 octets]
+ KLEN Length of onion key [2 octets]
+ KEY Rendezvous point onion key [KLEN octets]
+ RC Rendezvous cookie [20 octets]
+ g^x Diffie-Hellman data, part 1 [128 octets]
+
+1.9. Introduction: From the Introduction Point to Bob's OP
+
+ If the Introduction Point recognizes PK_ID as a public key which has
+ established a circuit for introductions as in 1.3 above, it sends the body
+ of the cell in a new RELAY_INTRODUCE2 cell down the corresponding circuit.
+ (If the PK_ID is unrecognized, the RELAY_INTRODUCE1 cell is discarded.)
+
+ After sending the RELAY_INTRODUCE2 cell, the OR replies to Alice with an
+ empty RELAY_COMMAND_INTRODUCE_ACK cell. If no RELAY_INTRODUCE2 cell can
+ be sent, the OR replies to Alice with a non-empty cell to indicate an
+ error. (The semantics of the cell body may be determined later; the
+ current implementation sends a single '1' byte on failure.)
+
+ When Bob's OP receives the RELAY_INTRODUCE2 cell, it decrypts it with
+ the private key for the corresponding hidden service, and extracts the
+ rendezvous point's nickname, the rendezvous cookie, and the value of g^x
+ chosen by Alice.
+
+1.10. Rendezvous
+
+ Bob's OP builds a new Tor circuit ending at Alice's chosen rendezvous
+ point, and sends a RELAY_RENDEZVOUS1 cell along this circuit, containing:
+ RC Rendezvous cookie [20 octets]
+ g^y Diffie-Hellman [128 octets]
+ KH Handshake digest [20 octets]
+
+ (Bob's OP MUST NOT use this circuit for any other purpose.)
+
+ If the RP recognizes RC, it relays the rest of the cell down the
+ corresponding circuit in a RELAY_RENDEZVOUS2 cell, containing:
+
+ g^y Diffie-Hellman [128 octets]
+ KH Handshake digest [20 octets]
+
+ (If the RP does not recognize the RC, it discards the cell and
+ tears down the circuit.)
+
+ When Alice's OP receives a RELAY_RENDEZVOUS2 cell on a circuit which
+ has sent a RELAY_ESTABLISH_RENDEZVOUS cell but which has not yet received
+ a reply, it uses g^y and H(g^xy) to complete the handshake as in the Tor
+ circuit extend process: they establish a 60-octet string as
+ K = SHA1(g^xy | [00]) | SHA1(g^xy | [01]) | SHA1(g^xy | [02])
+ and generate
+ KH = K[0..15]
+ Kf = K[16..31]
+ Kb = K[32..47]
+
+ Subsequently, the rendezvous point passes relay cells, unchanged, from
+ each of the two circuits to the other. When Alice's OP sends
+ RELAY cells along the circuit, it first encrypts them with the
+ Kf, then with all of the keys for the ORs in Alice's side of the circuit;
+ and when Alice's OP receives RELAY cells from the circuit, it decrypts
+ them with the keys for the ORs in Alice's side of the circuit, then
+ decrypts them with Kb. Bob's OP does the same, with Kf and Kb
+ interchanged.
+
+1.11. Creating streams
+
+ To open TCP connections to Bob's location-hidden service, Alice's OP sends
+ a RELAY_BEGIN cell along the established circuit, using the special
+ address "", and a chosen port. Bob's OP chooses a destination IP and
+ port, based on the configuration of the service connected to the circuit,
+ and opens a TCP stream. From then on, Bob's OP treats the stream as an
+ ordinary exit connection.
+ [ Except he doesn't include addr in the connected cell or the end
+ cell. -RD]
+
+ Alice MAY send multiple RELAY_BEGIN cells along the circuit, to open
+ multiple streams to Bob. Alice SHOULD NOT send RELAY_BEGIN cells for any
+ other address along her circuit to Bob; if she does, Bob MUST reject them.
+
+2. Authentication and authorization.
+
+Foo.
+
+3. Hidden service directory operation
+
+ This section has been introduced with the v2 hidden service descriptor
+ format. It describes all operations of the v2 hidden service descriptor
+ fetching and propagation mechanism that are required for the protocol
+ described in section 1 to succeed with v2 hidden service descriptors.
+
+3.1. Configuring as hidden service directory
+
+ Every onion router that has its directory port open can decide whether it
+ wants to store and serve hidden service descriptors. An onion router which
+ is configured as such includes the "hidden-service-dir" flag in its router
+ descriptors that it sends to directory authorities.
+
+ The directory authorities include a new flag "HSDir" for routers that
+ decided to provide storage for hidden service descriptors and that
+ have been running for at least 24 hours.
+
+3.2. Accepting publish requests
+
+ Hidden service directory nodes accept publish requests for v2 hidden service
+ descriptors and store them to their local memory. (It is not necessary to
+ make descriptors persistent, because after restarting, the onion router
+ would not be accepted as a storing node anyway, because it has not been
+ running for at least 24 hours.) All requests and replies are formatted as
+ HTTP messages. Requests are initiated via BEGIN_DIR cells directed to
+ the router's directory port, and formatted as HTTP POST requests to the URL
+ "/tor/rendezvous2/publish" relative to the hidden service directory's root,
+ containing as its body a v2 service descriptor.
+
+ A hidden service directory node parses every received descriptor and only
+ stores it when it thinks that it is responsible for storing that descriptor
+ based on its own routing table. See section 1.4 for more information on how
+ to determine responsibility for a certain descriptor ID.
+
+3.3. Processing fetch requests
+
+ Hidden service directory nodes process fetch requests for hidden service
+ descriptors by looking them up in their local memory. (They do not need to
+ determine if they are responsible for the passed ID, because it does no harm
+ if they deliver a descriptor for which they are not (any more) responsible.)
+ All requests and replies are formatted as HTTP messages. Requests are
+ initiated via BEGIN_DIR cells directed to the router's directory port,
+ and formatted as HTTP GET requests for the document "/tor/rendezvous2/",
+ where z is replaced with the encoding of the descriptor ID.
+
diff --git a/orchid/doc/spec/socks-extensions.txt b/orchid/doc/spec/socks-extensions.txt
new file mode 100644
index 00000000..62d86acd
--- /dev/null
+++ b/orchid/doc/spec/socks-extensions.txt
@@ -0,0 +1,78 @@
+Tor's extensions to the SOCKS protocol
+
+1. Overview
+
+ The SOCKS protocol provides a generic interface for TCP proxies. Client
+ software connects to a SOCKS server via TCP, and requests a TCP connection
+ to another address and port. The SOCKS server establishes the connection,
+ and reports success or failure to the client. After the connection has
+ been established, the client application uses the TCP stream as usual.
+
+ Tor supports SOCKS4 as defined in [1], SOCKS4A as defined in [2], and
+ SOCKS5 as defined in [3].
+
+ The stickiest issue for Tor in supporting clients, in practice, is forcing
+ DNS lookups to occur at the OR side: if clients do their own DNS lookup,
+ the DNS server can learn which addresses the client wants to reach.
+ SOCKS4 supports addressing by IPv4 address; SOCKS4A is a kludge on top of
+ SOCKS4 to allow addressing by hostname; SOCKS5 supports IPv4, IPv6, and
+ hostnames.
+
+1.1. Extent of support
+
+ Tor supports the SOCKS4, SOCKS4A, and SOCKS5 standards, except as follows:
+
+ BOTH:
+ - The BIND command is not supported.
+
+ SOCKS4,4A:
+ - SOCKS4 usernames are ignored.
+
+ SOCKS5:
+ - The (SOCKS5) "UDP ASSOCIATE" command is not supported.
+ - IPv6 is not supported in CONNECT commands.
+ - Only the "NO AUTHENTICATION" (SOCKS5) authentication method [00] is
+ supported.
+
+2. Name lookup
+
+ As an extension to SOCKS4A and SOCKS5, Tor implements a new command value,
+ "RESOLVE" [F0]. When Tor receives a "RESOLVE" SOCKS command, it initiates
+ a remote lookup of the hostname provided as the target address in the SOCKS
+ request. The reply is either an error (if the address couldn't be
+ resolved) or a success response. In the case of success, the address is
+ stored in the portion of the SOCKS response reserved for remote IP address.
+
+ (We support RESOLVE in SOCKS4 too, even though it is unnecessary.)
+
+ For SOCKS5 only, we support reverse resolution with a new command value,
+ "RESOLVE_PTR" [F1]. In response to a "RESOLVE_PTR" SOCKS5 command with
+ an IPv4 address as its target, Tor attempts to find the canonical
+ hostname for that IPv4 record, and returns it in the "server bound
+ address" portion of the reply.
+ (This command was not supported before Tor 0.1.2.2-alpha.)
+
+3. Other command extensions.
+
+ Tor 0.1.2.4-alpha added a new command value: "CONNECT_DIR" [F2].
+ In this case, Tor will open an encrypted direct TCP connection to the
+ directory port of the Tor server specified by address:port (the port
+ specified should be the ORPort of the server). It uses a one-hop tunnel
+ and a "BEGIN_DIR" relay cell to accomplish this secure connection.
+
+ The F2 command value was removed in Tor 0.2.0.10-alpha in favor of a
+ new use_begindir flag in edge_connection_t.
+
+4. HTTP-resistance
+
+ Tor checks the first byte of each SOCKS request to see whether it looks
+ more like an HTTP request (that is, it starts with a "G", "H", or "P"). If
+ so, Tor returns a small webpage, telling the user that his/her browser is
+ misconfigured. This is helpful for the many users who mistakenly try to
+ use Tor as an HTTP proxy instead of a SOCKS proxy.
+
+References:
+ [1] http://archive.socks.permeo.com/protocol/socks4.protocol
+ [2] http://archive.socks.permeo.com/protocol/socks4a.protocol
+ [3] SOCKS5: RFC1928
+
diff --git a/orchid/doc/spec/tor-spec.txt b/orchid/doc/spec/tor-spec.txt
new file mode 100644
index 00000000..efa6029f
--- /dev/null
+++ b/orchid/doc/spec/tor-spec.txt
@@ -0,0 +1,992 @@
+
+ Tor Protocol Specification
+
+ Roger Dingledine
+ Nick Mathewson
+
+Note: This document aims to specify Tor as implemented in 0.2.1.x. Future
+versions of Tor may implement improved protocols, and compatibility is not
+guaranteed. Compatibility notes are given for versions 0.1.1.15-rc and
+later; earlier versions are not compatible with the Tor network as of this
+writing.
+
+This specification is not a design document; most design criteria
+are not examined. For more information on why Tor acts as it does,
+see tor-design.pdf.
+
+0. Preliminaries
+
+0.1. Notation and encoding
+
+ PK -- a public key.
+ SK -- a private key.
+ K -- a key for a symmetric cypher.
+
+ a|b -- concatenation of 'a' and 'b'.
+
+ [A0 B1 C2] -- a three-byte sequence, containing the bytes with
+ hexadecimal values A0, B1, and C2, in that order.
+
+ All numeric values are encoded in network (big-endian) order.
+
+ H(m) -- a cryptographic hash of m.
+
+0.2. Security parameters
+
+ Tor uses a stream cipher, a public-key cipher, the Diffie-Hellman
+ protocol, and a hash function.
+
+ KEY_LEN -- the length of the stream cipher's key, in bytes.
+
+ PK_ENC_LEN -- the length of a public-key encrypted message, in bytes.
+ PK_PAD_LEN -- the number of bytes added in padding for public-key
+ encryption, in bytes. (The largest number of bytes that can be encrypted
+ in a single public-key operation is therefore PK_ENC_LEN-PK_PAD_LEN.)
+
+ DH_LEN -- the number of bytes used to represent a member of the
+ Diffie-Hellman group.
+ DH_SEC_LEN -- the number of bytes used in a Diffie-Hellman private key (x).
+
+ HASH_LEN -- the length of the hash function's output, in bytes.
+
+ PAYLOAD_LEN -- The longest allowable cell payload, in bytes. (509)
+
+ CELL_LEN -- The length of a Tor cell, in bytes.
+
+0.3. Ciphers
+
+ For a stream cipher, we use 128-bit AES in counter mode, with an IV of all
+ 0 bytes.
+
+ For a public-key cipher, we use RSA with 1024-bit keys and a fixed
+ exponent of 65537. We use OAEP-MGF1 padding, with SHA-1 as its digest
+ function. We leave the optional "Label" parameter unset. (For OAEP
+ padding, see ftp://ftp.rsasecurity.com/pub/pkcs/pkcs-1/pkcs-1v2-1.pdf)
+
+ For Diffie-Hellman, we use a generator (g) of 2. For the modulus (p), we
+ use the 1024-bit safe prime from rfc2409 section 6.2 whose hex
+ representation is:
+
+ "FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD129024E08"
+ "8A67CC74020BBEA63B139B22514A08798E3404DDEF9519B3CD3A431B"
+ "302B0A6DF25F14374FE1356D6D51C245E485B576625E7EC6F44C42E9"
+ "A637ED6B0BFF5CB6F406B7EDEE386BFB5A899FA5AE9F24117C4B1FE6"
+ "49286651ECE65381FFFFFFFFFFFFFFFF"
+
+ As an optimization, implementations SHOULD choose DH private keys (x) of
+ 320 bits. Implementations that do this MUST never use any DH key more
+ than once.
+ [May other implementations reuse their DH keys?? -RD]
+ [Probably not. Conceivably, you could get away with changing DH keys once
+ per second, but there are too many oddball attacks for me to be
+ comfortable that this is safe. -NM]
+
+ For a hash function, we use SHA-1.
+
+ KEY_LEN=16.
+ DH_LEN=128; DH_SEC_LEN=40.
+ PK_ENC_LEN=128; PK_PAD_LEN=42.
+ HASH_LEN=20.
+
+ When we refer to "the hash of a public key", we mean the SHA-1 hash of the
+ DER encoding of an ASN.1 RSA public key (as specified in PKCS.1).
+
+ All "random" values should be generated with a cryptographically strong
+ random number generator, unless otherwise noted.
+
+ The "hybrid encryption" of a byte sequence M with a public key PK is
+ computed as follows:
+ 1. If M is less than PK_ENC_LEN-PK_PAD_LEN, pad and encrypt M with PK.
+ 2. Otherwise, generate a KEY_LEN byte random key K.
+ Let M1 = the first PK_ENC_LEN-PK_PAD_LEN-KEY_LEN bytes of M,
+ and let M2 = the rest of M.
+ Pad and encrypt K|M1 with PK. Encrypt M2 with our stream cipher,
+ using the key K. Concatenate these encrypted values.
+ [XXX Note that this "hybrid encryption" approach does not prevent
+ an attacker from adding or removing bytes to the end of M. It also
+ allows attackers to modify the bytes not covered by the OAEP --
+ see Goldberg's PET2006 paper for details. We will add a MAC to this
+ scheme one day. -RD]
+
+0.4. Other parameter values
+
+ CELL_LEN=512
+
+1. System overview
+
+ Tor is a distributed overlay network designed to anonymize
+ low-latency TCP-based applications such as web browsing, secure shell,
+ and instant messaging. Clients choose a path through the network and
+ build a ``circuit'', in which each node (or ``onion router'' or ``OR'')
+ in the path knows its predecessor and successor, but no other nodes in
+ the circuit. Traffic flowing down the circuit is sent in fixed-size
+ ``cells'', which are unwrapped by a symmetric key at each node (like
+ the layers of an onion) and relayed downstream.
+
+1.1. Keys and names
+
+ Every Tor server has multiple public/private keypairs:
+
+ - A long-term signing-only "Identity key" used to sign documents and
+ certificates, and used to establish server identity.
+ - A medium-term "Onion key" used to decrypt onion skins when accepting
+ circuit extend attempts. (See 5.1.) Old keys MUST be accepted for at
+ least one week after they are no longer advertised. Because of this,
+ servers MUST retain old keys for a while after they're rotated.
+ - A short-term "Connection key" used to negotiate TLS connections.
+ Tor implementations MAY rotate this key as often as they like, and
+ SHOULD rotate this key at least once a day.
+
+ Tor servers are also identified by "nicknames"; these are specified in
+ dir-spec.txt.
+
+2. Connections
+
+ Connections between two Tor servers, or between a client and a server,
+ use TLS/SSLv3 for link authentication and encryption. All
+ implementations MUST support the SSLv3 ciphersuite
+ "SSL_DHE_RSA_WITH_3DES_EDE_CBC_SHA", and SHOULD support the TLS
+ ciphersuite "TLS_DHE_RSA_WITH_AES_128_CBC_SHA" if it is available.
+
+ There are three acceptable ways to perform a TLS handshake when
+ connecting to a Tor server: "certificates up-front", "renegotiation", and
+ "backwards-compatible renegotiation". ("Backwards-compatible
+ renegotiation" is, as the name implies, compatible with both other
+ handshake types.)
+
+ Before Tor 0.2.0.21, only "certificates up-front" was supported. In Tor
+ 0.2.0.21 or later, "backwards-compatible renegotiation" is used.
+
+ In "certificates up-front", the connection initiator always sends a
+ two-certificate chain, consisting of an X.509 certificate using a
+ short-term connection public key and a second, self- signed X.509
+ certificate containing its identity key. The other party sends a similar
+ certificate chain. The initiator's ClientHello MUST NOT include any
+ ciphersuites other than:
+ TLS_DHE_RSA_WITH_AES_256_CBC_SHA
+ TLS_DHE_RSA_WITH_AES_128_CBC_SHA
+ SSL_DHE_RSA_WITH_3DES_EDE_CBC_SHA
+ SSL_DHE_DSS_WITH_3DES_EDE_CBC_SHA
+
+ In "renegotiation", the connection initiator sends no certificates, and
+ the responder sends a single connection certificate. Once the TLS
+ handshake is complete, the initiator renegotiates the handshake, with each
+ parties sending a two-certificate chain as in "certificates up-front".
+ The initiator's ClientHello MUST include at least once ciphersuite not in
+ the list above. The responder SHOULD NOT select any ciphersuite besides
+ those in the list above.
+ [The above "should not" is because some of the ciphers that
+ clients list may be fake.]
+
+ In "backwards-compatible renegotiation", the connection initiator's
+ ClientHello MUST include at least one ciphersuite other than those listed
+ above. The connection responder examines the initiator's ciphersuite list
+ to see whether it includes any ciphers other than those included in the
+ list above. If extra ciphers are included, the responder proceeds as in
+ "renegotiation": it sends a single certificate and does not request
+ client certificates. Otherwise (in the case that no extra ciphersuites
+ are included in the ClientHello) the responder proceeds as in
+ "certificates up-front": it requests client certificates, and sends a
+ two-certificate chain. In either case, once the responder has sent its
+ certificate or certificates, the initiator counts them. If two
+ certificates have been sent, it proceeds as in "certificates up-front";
+ otherwise, it proceeds as in "renegotiation".
+
+ All new implementations of the Tor server protocol MUST support
+ "backwards-compatible renegotiation"; clients SHOULD do this too. If
+ this is not possible, new client implementations MUST support both
+ "renegotiation" and "certificates up-front" and use the router's
+ published link protocols list (see dir-spec.txt on the "protocols" entry)
+ to decide which to use.
+
+ In all of the above handshake variants, certificates sent in the clear
+ SHOULD NOT include any strings to identify the host as a Tor server. In
+ the "renegotation" and "backwards-compatible renegotiation", the
+ initiator SHOULD chose a list of ciphersuites and TLS extensions chosen
+ to mimic one used by a popular web browser.
+
+ Responders MUST NOT select any TLS ciphersuite that lacks ephemeral keys,
+ or whose symmetric keys are less then KEY_LEN bits, or whose digests are
+ less than HASH_LEN bits. Responders SHOULD NOT select any SSLv3
+ ciphersuite other than those listed above.
+
+ Even though the connection protocol is identical, we will think of the
+ initiator as either an onion router (OR) if it is willing to relay
+ traffic for other Tor users, or an onion proxy (OP) if it only handles
+ local requests. Onion proxies SHOULD NOT provide long-term-trackable
+ identifiers in their handshakes.
+
+ In all handshake variants, once all certificates are exchanged, all
+ parties receiving certificates must confirm that the identity key is as
+ expected. (When initiating a connection, the expected identity key is
+ the one given in the directory; when creating a connection because of an
+ EXTEND cell, the expected identity key is the one given in the cell.) If
+ the key is not as expected, the party must close the connection.
+
+ When connecting to an OR, all parties SHOULD reject the connection if that
+ OR has a malformed or missing certificate. When accepting an incoming
+ connection, an OR SHOULD NOT reject incoming connections from parties with
+ malformed or missing certificates. (However, an OR should not believe
+ that an incoming connection is from another OR unless the certificates
+ are present and well-formed.)
+
+ [Before version 0.1.2.8-rc, ORs rejected incoming connections from ORs and
+ OPs alike if their certificates were missing or malformed.]
+
+ Once a TLS connection is established, the two sides send cells
+ (specified below) to one another. Cells are sent serially. All
+ cells are CELL_LEN bytes long. Cells may be sent embedded in TLS
+ records of any size or divided across TLS records, but the framing
+ of TLS records MUST NOT leak information about the type or contents
+ of the cells.
+
+ TLS connections are not permanent. Either side MAY close a connection
+ if there are no circuits running over it and an amount of time
+ (KeepalivePeriod, defaults to 5 minutes) has passed since the last time
+ any traffic was transmitted over the TLS connection. Clients SHOULD
+ also hold a TLS connection with no circuits open, if it is likely that a
+ circuit will be built soon using that connection.
+
+ (As an exception, directory servers may try to stay connected to all of
+ the ORs -- though this will be phased out for the Tor 0.1.2.x release.)
+
+ To avoid being trivially distinguished from servers, client-only Tor
+ instances are encouraged but not required to use a two-certificate chain
+ as well. Clients SHOULD NOT keep using the same certificates when
+ their IP address changes. Clients MAY send no certificates at all.
+
+3. Cell Packet format
+
+ The basic unit of communication for onion routers and onion
+ proxies is a fixed-width "cell".
+
+ On a version 1 connection, each cell contains the following
+ fields:
+
+ CircID [2 bytes]
+ Command [1 byte]
+ Payload (padded with 0 bytes) [PAYLOAD_LEN bytes]
+
+ On a version 2 connection, all cells are as in version 1 connections,
+ except for the initial VERSIONS cell, whose format is:
+
+ Circuit [2 octets; set to 0]
+ Command [1 octet; set to 7 for VERSIONS]
+ Length [2 octets; big-endian integer]
+ Payload [Length bytes]
+
+ The CircID field determines which circuit, if any, the cell is
+ associated with.
+
+ The 'Command' field holds one of the following values:
+ 0 -- PADDING (Padding) (See Sec 7.2)
+ 1 -- CREATE (Create a circuit) (See Sec 5.1)
+ 2 -- CREATED (Acknowledge create) (See Sec 5.1)
+ 3 -- RELAY (End-to-end data) (See Sec 5.5 and 6)
+ 4 -- DESTROY (Stop using a circuit) (See Sec 5.4)
+ 5 -- CREATE_FAST (Create a circuit, no PK) (See Sec 5.1)
+ 6 -- CREATED_FAST (Circuit created, no PK) (See Sec 5.1)
+ 7 -- VERSIONS (Negotiate proto version) (See Sec 4)
+ 8 -- NETINFO (Time and address info) (See Sec 4)
+ 9 -- RELAY_EARLY (End-to-end data; limited) (See sec 5.6)
+
+ The interpretation of 'Payload' depends on the type of the cell.
+ PADDING: Payload is unused.
+ CREATE: Payload contains the handshake challenge.
+ CREATED: Payload contains the handshake response.
+ RELAY: Payload contains the relay header and relay body.
+ DESTROY: Payload contains a reason for closing the circuit.
+ (see 5.4)
+ Upon receiving any other value for the command field, an OR must
+ drop the cell. Since more cell types may be added in the future, ORs
+ should generally not warn when encountering unrecognized commands.
+
+ The payload is padded with 0 bytes.
+
+ PADDING cells are currently used to implement connection keepalive.
+ If there is no other traffic, ORs and OPs send one another a PADDING
+ cell every few minutes.
+
+ CREATE, CREATED, and DESTROY cells are used to manage circuits;
+ see section 5 below.
+
+ RELAY cells are used to send commands and data along a circuit; see
+ section 6 below.
+
+ VERSIONS and NETINFO cells are used to set up connections. See section 4
+ below.
+
+4. Negotiating and initializing connections
+
+4.1. Negotiating versions with VERSIONS cells
+
+ There are multiple instances of the Tor link connection protocol. Any
+ connection negotiated using the "certificates up front" handshake (see
+ section 2 above) is "version 1". In any connection where both parties
+ have behaved as in the "renegotiation" handshake, the link protocol
+ version is 2 or higher.
+
+ To determine the version, in any connection where the "renegotiation"
+ handshake was used (that is, where the server sent only one certificate
+ at first and where the client did not send any certificates until
+ renegotiation), both parties MUST send a VERSIONS cell immediately after
+ the renegotiation is finished, before any other cells are sent. Parties
+ MUST NOT send any other cells on a connection until they have received a
+ VERSIONS cell.
+
+ The payload in a VERSIONS cell is a series of big-endian two-byte
+ integers. Both parties MUST select as the link protocol version the
+ highest number contained both in the VERSIONS cell they sent and in the
+ versions cell they received. If they have no such version in common,
+ they cannot communicate and MUST close the connection.
+
+ Since the version 1 link protocol does not use the "renegotiation"
+ handshake, implementations MUST NOT list version 1 in their VERSIONS
+ cell.
+
+4.2. NETINFO cells
+
+ If version 2 or higher is negotiated, each party sends the other a
+ NETINFO cell. The cell's payload is:
+
+ Timestamp [4 bytes]
+ Other OR's address [variable]
+ Number of addresses [1 byte]
+ This OR's addresses [variable]
+
+ The address format is a type/length/value sequence as given in section
+ 6.4 below. The timestamp is a big-endian unsigned integer number of
+ seconds since the unix epoch.
+
+ Implementations MAY use the timestamp value to help decide if their
+ clocks are skewed. Initiators MAY use "other OR's address" to help
+ learn which address their connections are originating from, if they do
+ not know it. Initiators SHOULD use "this OR's address" to make sure
+ that they have connected to another OR at its canonical address.
+
+ [As of 0.2.0.23-rc, implementations use none of the above values.]
+
+
+5. Circuit management
+
+5.1. CREATE and CREATED cells
+
+ Users set up circuits incrementally, one hop at a time. To create a
+ new circuit, OPs send a CREATE cell to the first node, with the
+ first half of the DH handshake; that node responds with a CREATED
+ cell with the second half of the DH handshake plus the first 20 bytes
+ of derivative key data (see section 5.2). To extend a circuit past
+ the first hop, the OP sends an EXTEND relay cell (see section 5)
+ which instructs the last node in the circuit to send a CREATE cell
+ to extend the circuit.
+
+ The payload for a CREATE cell is an 'onion skin', which consists
+ of the first step of the DH handshake data (also known as g^x).
+ This value is hybrid-encrypted (see 0.3) to Bob's onion key, giving
+ an onion-skin of:
+ PK-encrypted:
+ Padding [PK_PAD_LEN bytes]
+ Symmetric key [KEY_LEN bytes]
+ First part of g^x [PK_ENC_LEN-PK_PAD_LEN-KEY_LEN bytes]
+ Symmetrically encrypted:
+ Second part of g^x [DH_LEN-(PK_ENC_LEN-PK_PAD_LEN-KEY_LEN)
+ bytes]
+
+ The relay payload for an EXTEND relay cell consists of:
+ Address [4 bytes]
+ Port [2 bytes]
+ Onion skin [DH_LEN+KEY_LEN+PK_PAD_LEN bytes]
+ Identity fingerprint [HASH_LEN bytes]
+
+ The port and address field denote the IPV4 address and port of the next
+ onion router in the circuit; the public key hash is the hash of the PKCS#1
+ ASN1 encoding of the next onion router's identity (signing) key. (See 0.3
+ above.) Including this hash allows the extending OR verify that it is
+ indeed connected to the correct target OR, and prevents certain
+ man-in-the-middle attacks.
+
+ The payload for a CREATED cell, or the relay payload for an
+ EXTENDED cell, contains:
+ DH data (g^y) [DH_LEN bytes]
+ Derivative key data (KH) [HASH_LEN bytes]
+
+ The CircID for a CREATE cell is an arbitrarily chosen 2-byte integer,
+ selected by the node (OP or OR) that sends the CREATE cell. To prevent
+ CircID collisions, when one node sends a CREATE cell to another, it chooses
+ from only one half of the possible values based on the ORs' public
+ identity keys: if the sending node has a lower key, it chooses a CircID with
+ an MSB of 0; otherwise, it chooses a CircID with an MSB of 1.
+
+ (An OP with no public key MAY choose any CircID it wishes, since an OP
+ never needs to process a CREATE cell.)
+
+ Public keys are compared numerically by modulus.
+
+ As usual with DH, x and y MUST be generated randomly.
+
+5.1.1. CREATE_FAST/CREATED_FAST cells
+
+ When initializing the first hop of a circuit, the OP has already
+ established the OR's identity and negotiated a secret key using TLS.
+ Because of this, it is not always necessary for the OP to perform the
+ public key operations to create a circuit. In this case, the
+ OP MAY send a CREATE_FAST cell instead of a CREATE cell for the first
+ hop only. The OR responds with a CREATED_FAST cell, and the circuit is
+ created.
+
+ A CREATE_FAST cell contains:
+
+ Key material (X) [HASH_LEN bytes]
+
+ A CREATED_FAST cell contains:
+
+ Key material (Y) [HASH_LEN bytes]
+ Derivative key data [HASH_LEN bytes] (See 5.2 below)
+
+ The values of X and Y must be generated randomly.
+
+ If an OR sees a circuit created with CREATE_FAST, the OR is sure to be the
+ first hop of a circuit. ORs SHOULD reject attempts to create streams with
+ RELAY_BEGIN exiting the circuit at the first hop: letting Tor be used as a
+ single hop proxy makes exit nodes a more attractive target for compromise.
+
+5.2. Setting circuit keys
+
+ Once the handshake between the OP and an OR is completed, both can
+ now calculate g^xy with ordinary DH. Before computing g^xy, both client
+ and server MUST verify that the received g^x or g^y value is not degenerate;
+ that is, it must be strictly greater than 1 and strictly less than p-1
+ where p is the DH modulus. Implementations MUST NOT complete a handshake
+ with degenerate keys. Implementations MUST NOT discard other "weak"
+ g^x values.
+
+ (Discarding degenerate keys is critical for security; if bad keys
+ are not discarded, an attacker can substitute the server's CREATED
+ cell's g^y with 0 or 1, thus creating a known g^xy and impersonating
+ the server. Discarding other keys may allow attacks to learn bits of
+ the private key.)
+
+ If CREATE or EXTEND is used to extend a circuit, the client and server
+ base their key material on K0=g^xy, represented as a big-endian unsigned
+ integer.
+
+ If CREATE_FAST is used, the client and server base their key material on
+ K0=X|Y.
+
+ From the base key material K0, they compute KEY_LEN*2+HASH_LEN*3 bytes of
+ derivative key data as
+ K = H(K0 | [00]) | H(K0 | [01]) | H(K0 | [02]) | ...
+
+ The first HASH_LEN bytes of K form KH; the next HASH_LEN form the forward
+ digest Df; the next HASH_LEN 41-60 form the backward digest Db; the next
+ KEY_LEN 61-76 form Kf, and the final KEY_LEN form Kb. Excess bytes from K
+ are discarded.
+
+ KH is used in the handshake response to demonstrate knowledge of the
+ computed shared key. Df is used to seed the integrity-checking hash
+ for the stream of data going from the OP to the OR, and Db seeds the
+ integrity-checking hash for the data stream from the OR to the OP. Kf
+ is used to encrypt the stream of data going from the OP to the OR, and
+ Kb is used to encrypt the stream of data going from the OR to the OP.
+
+5.3. Creating circuits
+
+ When creating a circuit through the network, the circuit creator
+ (OP) performs the following steps:
+
+ 1. Choose an onion router as an exit node (R_N), such that the onion
+ router's exit policy includes at least one pending stream that
+ needs a circuit (if there are any).
+
+ 2. Choose a chain of (N-1) onion routers
+ (R_1...R_N-1) to constitute the path, such that no router
+ appears in the path twice.
+
+ 3. If not already connected to the first router in the chain,
+ open a new connection to that router.
+
+ 4. Choose a circID not already in use on the connection with the
+ first router in the chain; send a CREATE cell along the
+ connection, to be received by the first onion router.
+
+ 5. Wait until a CREATED cell is received; finish the handshake
+ and extract the forward key Kf_1 and the backward key Kb_1.
+
+ 6. For each subsequent onion router R (R_2 through R_N), extend
+ the circuit to R.
+
+ To extend the circuit by a single onion router R_M, the OP performs
+ these steps:
+
+ 1. Create an onion skin, encrypted to R_M's public onion key.
+
+ 2. Send the onion skin in a relay EXTEND cell along
+ the circuit (see section 5).
+
+ 3. When a relay EXTENDED cell is received, verify KH, and
+ calculate the shared keys. The circuit is now extended.
+
+ When an onion router receives an EXTEND relay cell, it sends a CREATE
+ cell to the next onion router, with the enclosed onion skin as its
+ payload. As special cases, if the extend cell includes a digest of
+ all zeroes, or asks to extend back to the relay that sent the extend
+ cell, the circuit will fail and be torn down. The initiating onion
+ router chooses some circID not yet used on the connection between the
+ two onion routers. (But see section 5.1. above, concerning choosing
+ circIDs based on lexicographic order of nicknames.)
+
+ When an onion router receives a CREATE cell, if it already has a
+ circuit on the given connection with the given circID, it drops the
+ cell. Otherwise, after receiving the CREATE cell, it completes the
+ DH handshake, and replies with a CREATED cell. Upon receiving a
+ CREATED cell, an onion router packs it payload into an EXTENDED relay
+ cell (see section 5), and sends that cell up the circuit. Upon
+ receiving the EXTENDED relay cell, the OP can retrieve g^y.
+
+ (As an optimization, OR implementations may delay processing onions
+ until a break in traffic allows time to do so without harming
+ network latency too greatly.)
+
+5.3.1. Canonical connections
+
+ It is possible for an attacker to launch a man-in-the-middle attack
+ against a connection by telling OR Alice to extend to OR Bob at some
+ address X controlled by the attacker. The attacker cannot read the
+ encrypted traffic, but the attacker is now in a position to count all
+ bytes sent between Alice and Bob (assuming Alice was not already
+ connected to Bob.)
+
+ To prevent this, when an OR we gets an extend request, it SHOULD use an
+ existing OR connection if the ID matches, and ANY of the following
+ conditions hold:
+ - The IP matches the requested IP.
+ - The OR knows that the IP of the connection it's using is canonical
+ because it was listed in the NETINFO cell.
+ - The OR knows that the IP of the connection it's using is canonical
+ because it was listed in the server descriptor.
+
+ [This is not implemented in Tor 0.2.0.23-rc.]
+
+5.4. Tearing down circuits
+
+ Circuits are torn down when an unrecoverable error occurs along
+ the circuit, or when all streams on a circuit are closed and the
+ circuit's intended lifetime is over. Circuits may be torn down
+ either completely or hop-by-hop.
+
+ To tear down a circuit completely, an OR or OP sends a DESTROY
+ cell to the adjacent nodes on that circuit, using the appropriate
+ direction's circID.
+
+ Upon receiving an outgoing DESTROY cell, an OR frees resources
+ associated with the corresponding circuit. If it's not the end of
+ the circuit, it sends a DESTROY cell for that circuit to the next OR
+ in the circuit. If the node is the end of the circuit, then it tears
+ down any associated edge connections (see section 6.1).
+
+ After a DESTROY cell has been processed, an OR ignores all data or
+ destroy cells for the corresponding circuit.
+
+ To tear down part of a circuit, the OP may send a RELAY_TRUNCATE cell
+ signaling a given OR (Stream ID zero). That OR sends a DESTROY
+ cell to the next node in the circuit, and replies to the OP with a
+ RELAY_TRUNCATED cell.
+
+ When an unrecoverable error occurs along one connection in a
+ circuit, the nodes on either side of the connection should, if they
+ are able, act as follows: the node closer to the OP should send a
+ RELAY_TRUNCATED cell towards the OP; the node farther from the OP
+ should send a DESTROY cell down the circuit.
+
+ The payload of a RELAY_TRUNCATED or DESTROY cell contains a single octet,
+ describing why the circuit is being closed or truncated. When sending a
+ TRUNCATED or DESTROY cell because of another TRUNCATED or DESTROY cell,
+ the error code should be propagated. The origin of a circuit always sets
+ this error code to 0, to avoid leaking its version.
+
+ The error codes are:
+ 0 -- NONE (No reason given.)
+ 1 -- PROTOCOL (Tor protocol violation.)
+ 2 -- INTERNAL (Internal error.)
+ 3 -- REQUESTED (A client sent a TRUNCATE command.)
+ 4 -- HIBERNATING (Not currently operating; trying to save bandwidth.)
+ 5 -- RESOURCELIMIT (Out of memory, sockets, or circuit IDs.)
+ 6 -- CONNECTFAILED (Unable to reach server.)
+ 7 -- OR_IDENTITY (Connected to server, but its OR identity was not
+ as expected.)
+ 8 -- OR_CONN_CLOSED (The OR connection that was carrying this circuit
+ died.)
+ 9 -- FINISHED (The circuit has expired for being dirty or old.)
+ 10 -- TIMEOUT (Circuit construction took too long)
+ 11 -- DESTROYED (The circuit was destroyed w/o client TRUNCATE)
+ 12 -- NOSUCHSERVICE (Request for unknown hidden service)
+
+5.5. Routing relay cells
+
+ When an OR receives a RELAY or RELAY_EARLY cell, it checks the cell's
+ circID and determines whether it has a corresponding circuit along that
+ connection. If not, the OR drops the cell.
+
+ Otherwise, if the OR is not at the OP edge of the circuit (that is,
+ either an 'exit node' or a non-edge node), it de/encrypts the payload
+ with the stream cipher, as follows:
+ 'Forward' relay cell (same direction as CREATE):
+ Use Kf as key; decrypt.
+ 'Back' relay cell (opposite direction from CREATE):
+ Use Kb as key; encrypt.
+ Note that in counter mode, decrypt and encrypt are the same operation.
+
+ The OR then decides whether it recognizes the relay cell, by
+ inspecting the payload as described in section 6.1 below. If the OR
+ recognizes the cell, it processes the contents of the relay cell.
+ Otherwise, it passes the decrypted relay cell along the circuit if
+ the circuit continues. If the OR at the end of the circuit
+ encounters an unrecognized relay cell, an error has occurred: the OR
+ sends a DESTROY cell to tear down the circuit.
+
+ When a relay cell arrives at an OP, the OP decrypts the payload
+ with the stream cipher as follows:
+ OP receives data cell:
+ For I=N...1,
+ Decrypt with Kb_I. If the payload is recognized (see
+ section 6..1), then stop and process the payload.
+
+ For more information, see section 6 below.
+
+5.6. Handling relay_early cells
+
+ A RELAY_EARLY cell is designed to limit the length any circuit can reach.
+ When an OR receives a RELAY_EARLY cell, and the next node in the circuit
+ is speaking v2 of the link protocol or later, the OR relays the cell as a
+ RELAY_EARLY cell. Otherwise, it relays it as a RELAY cell.
+
+ If a node ever receives more than 8 RELAY_EARLY cells on a given
+ outbound circuit, it SHOULD close the circuit. (For historical reasons,
+ we don't limit the number of inbound RELAY_EARLY cells; they should
+ be harmless anyway because clients won't accept extend requests. See
+ bug 1038.)
+
+ When speaking v2 of the link protocol or later, clients MUST only send
+ EXTEND cells inside RELAY_EARLY cells. Clients SHOULD send the first ~8
+ RELAY cells that are not targeted at the first hop of any circuit as
+ RELAY_EARLY cells too, in order to partially conceal the circuit length.
+
+ [In a future version of Tor, servers will reject any EXTEND cell not
+ received in a RELAY_EARLY cell. See proposal 110.]
+
+6. Application connections and stream management
+
+6.1. Relay cells
+
+ Within a circuit, the OP and the exit node use the contents of
+ RELAY packets to tunnel end-to-end commands and TCP connections
+ ("Streams") across circuits. End-to-end commands can be initiated
+ by either edge; streams are initiated by the OP.
+
+ The payload of each unencrypted RELAY cell consists of:
+ Relay command [1 byte]
+ 'Recognized' [2 bytes]
+ StreamID [2 bytes]
+ Digest [4 bytes]
+ Length [2 bytes]
+ Data [CELL_LEN-14 bytes]
+
+ The relay commands are:
+ 1 -- RELAY_BEGIN [forward]
+ 2 -- RELAY_DATA [forward or backward]
+ 3 -- RELAY_END [forward or backward]
+ 4 -- RELAY_CONNECTED [backward]
+ 5 -- RELAY_SENDME [forward or backward] [sometimes control]
+ 6 -- RELAY_EXTEND [forward] [control]
+ 7 -- RELAY_EXTENDED [backward] [control]
+ 8 -- RELAY_TRUNCATE [forward] [control]
+ 9 -- RELAY_TRUNCATED [backward] [control]
+ 10 -- RELAY_DROP [forward or backward] [control]
+ 11 -- RELAY_RESOLVE [forward]
+ 12 -- RELAY_RESOLVED [backward]
+ 13 -- RELAY_BEGIN_DIR [forward]
+
+ 32..40 -- Used for hidden services; see rend-spec.txt.
+
+ Commands labelled as "forward" must only be sent by the originator
+ of the circuit. Commands labelled as "backward" must only be sent by
+ other nodes in the circuit back to the originator. Commands marked
+ as either can be sent either by the originator or other nodes.
+
+ The 'recognized' field in any unencrypted relay payload is always set
+ to zero; the 'digest' field is computed as the first four bytes of
+ the running digest of all the bytes that have been destined for
+ this hop of the circuit or originated from this hop of the circuit,
+ seeded from Df or Db respectively (obtained in section 5.2 above),
+ and including this RELAY cell's entire payload (taken with the digest
+ field set to zero).
+
+ When the 'recognized' field of a RELAY cell is zero, and the digest
+ is correct, the cell is considered "recognized" for the purposes of
+ decryption (see section 5.5 above).
+
+ (The digest does not include any bytes from relay cells that do
+ not start or end at this hop of the circuit. That is, it does not
+ include forwarded data. Therefore if 'recognized' is zero but the
+ digest does not match, the running digest at that node should
+ not be updated, and the cell should be forwarded on.)
+
+ All RELAY cells pertaining to the same tunneled stream have the
+ same stream ID. StreamIDs are chosen arbitrarily by the OP. RELAY
+ cells that affect the entire circuit rather than a particular
+ stream use a StreamID of zero -- they are marked in the table above
+ as "[control]" style cells. (Sendme cells are marked as "sometimes
+ control" because they can take include a StreamID or not depending
+ on their purpose -- see Section 7.)
+
+ The 'Length' field of a relay cell contains the number of bytes in
+ the relay payload which contain real payload data. The remainder of
+ the payload is padded with NUL bytes.
+
+ If the RELAY cell is recognized but the relay command is not
+ understood, the cell must be dropped and ignored. Its contents
+ still count with respect to the digests, though.
+
+6.2. Opening streams and transferring data
+
+ To open a new anonymized TCP connection, the OP chooses an open
+ circuit to an exit that may be able to connect to the destination
+ address, selects an arbitrary StreamID not yet used on that circuit,
+ and constructs a RELAY_BEGIN cell with a payload encoding the address
+ and port of the destination host. The payload format is:
+
+ ADDRESS | ':' | PORT | [00]
+
+ where ADDRESS can be a DNS hostname, or an IPv4 address in
+ dotted-quad format, or an IPv6 address surrounded by square brackets;
+ and where PORT is a decimal integer between 1 and 65535, inclusive.
+
+ [What is the [00] for? -NM]
+ [It's so the payload is easy to parse out with string funcs -RD]
+
+ Upon receiving this cell, the exit node resolves the address as
+ necessary, and opens a new TCP connection to the target port. If the
+ address cannot be resolved, or a connection can't be established, the
+ exit node replies with a RELAY_END cell. (See 6.4 below.)
+ Otherwise, the exit node replies with a RELAY_CONNECTED cell, whose
+ payload is in one of the following formats:
+ The IPv4 address to which the connection was made [4 octets]
+ A number of seconds (TTL) for which the address may be cached [4 octets]
+ or
+ Four zero-valued octets [4 octets]
+ An address type (6) [1 octet]
+ The IPv6 address to which the connection was made [16 octets]
+ A number of seconds (TTL) for which the address may be cached [4 octets]
+ [XXXX No version of Tor currently generates the IPv6 format.]
+
+ [Tor servers before 0.1.2.0 set the TTL field to a fixed value. Later
+ versions set the TTL to the last value seen from a DNS server, and expire
+ their own cached entries after a fixed interval. This prevents certain
+ attacks.]
+
+ The OP waits for a RELAY_CONNECTED cell before sending any data.
+ Once a connection has been established, the OP and exit node
+ package stream data in RELAY_DATA cells, and upon receiving such
+ cells, echo their contents to the corresponding TCP stream.
+ RELAY_DATA cells sent to unrecognized streams are dropped.
+
+ Relay RELAY_DROP cells are long-range dummies; upon receiving such
+ a cell, the OR or OP must drop it.
+
+6.2.1. Opening a directory stream
+
+ If a Tor server is a directory server, it should respond to a
+ RELAY_BEGIN_DIR cell as if it had received a BEGIN cell requesting a
+ connection to its directory port. RELAY_BEGIN_DIR cells ignore exit
+ policy, since the stream is local to the Tor process.
+
+ If the Tor server is not running a directory service, it should respond
+ with a REASON_NOTDIRECTORY RELAY_END cell.
+
+ Clients MUST generate an all-zero payload for RELAY_BEGIN_DIR cells,
+ and servers MUST ignore the payload.
+
+ [RELAY_BEGIN_DIR was not supported before Tor 0.1.2.2-alpha; clients
+ SHOULD NOT send it to routers running earlier versions of Tor.]
+
+6.3. Closing streams
+
+ When an anonymized TCP connection is closed, or an edge node
+ encounters error on any stream, it sends a 'RELAY_END' cell along the
+ circuit (if possible) and closes the TCP connection immediately. If
+ an edge node receives a 'RELAY_END' cell for any stream, it closes
+ the TCP connection completely, and sends nothing more along the
+ circuit for that stream.
+
+ The payload of a RELAY_END cell begins with a single 'reason' byte to
+ describe why the stream is closing, plus optional data (depending on
+ the reason.) The values are:
+
+ 1 -- REASON_MISC (catch-all for unlisted reasons)
+ 2 -- REASON_RESOLVEFAILED (couldn't look up hostname)
+ 3 -- REASON_CONNECTREFUSED (remote host refused connection) [*]
+ 4 -- REASON_EXITPOLICY (OR refuses to connect to host or port)
+ 5 -- REASON_DESTROY (Circuit is being destroyed)
+ 6 -- REASON_DONE (Anonymized TCP connection was closed)
+ 7 -- REASON_TIMEOUT (Connection timed out, or OR timed out
+ while connecting)
+ 8 -- (unallocated) [**]
+ 9 -- REASON_HIBERNATING (OR is temporarily hibernating)
+ 10 -- REASON_INTERNAL (Internal error at the OR)
+ 11 -- REASON_RESOURCELIMIT (OR has no resources to fulfill request)
+ 12 -- REASON_CONNRESET (Connection was unexpectedly reset)
+ 13 -- REASON_TORPROTOCOL (Sent when closing connection because of
+ Tor protocol violations.)
+ 14 -- REASON_NOTDIRECTORY (Client sent RELAY_BEGIN_DIR to a
+ non-directory server.)
+
+ (With REASON_EXITPOLICY, the 4-byte IPv4 address or 16-byte IPv6 address
+ forms the optional data, along with a 4-byte TTL; no other reason
+ currently has extra data.)
+
+ OPs and ORs MUST accept reasons not on the above list, since future
+ versions of Tor may provide more fine-grained reasons.
+
+ Tors SHOULD NOT send any reason except REASON_MISC for a stream that they
+ have originated.
+
+ [*] Older versions of Tor also send this reason when connections are
+ reset.
+ [**] Due to a bug in versions of Tor through 0095, error reason 8 must
+ remain allocated until that version is obsolete.
+
+ --- [The rest of this section describes unimplemented functionality.]
+
+ Because TCP connections can be half-open, we follow an equivalent
+ to TCP's FIN/FIN-ACK/ACK protocol to close streams.
+
+ An exit connection can have a TCP stream in one of three states:
+ 'OPEN', 'DONE_PACKAGING', and 'DONE_DELIVERING'. For the purposes
+ of modeling transitions, we treat 'CLOSED' as a fourth state,
+ although connections in this state are not, in fact, tracked by the
+ onion router.
+
+ A stream begins in the 'OPEN' state. Upon receiving a 'FIN' from
+ the corresponding TCP connection, the edge node sends a 'RELAY_FIN'
+ cell along the circuit and changes its state to 'DONE_PACKAGING'.
+ Upon receiving a 'RELAY_FIN' cell, an edge node sends a 'FIN' to
+ the corresponding TCP connection (e.g., by calling
+ shutdown(SHUT_WR)) and changing its state to 'DONE_DELIVERING'.
+
+ When a stream in already in 'DONE_DELIVERING' receives a 'FIN', it
+ also sends a 'RELAY_FIN' along the circuit, and changes its state
+ to 'CLOSED'. When a stream already in 'DONE_PACKAGING' receives a
+ 'RELAY_FIN' cell, it sends a 'FIN' and changes its state to
+ 'CLOSED'.
+
+ If an edge node encounters an error on any stream, it sends a
+ 'RELAY_END' cell (if possible) and closes the stream immediately.
+
+6.4. Remote hostname lookup
+
+ To find the address associated with a hostname, the OP sends a
+ RELAY_RESOLVE cell containing the hostname to be resolved with a nul
+ terminating byte. (For a reverse lookup, the OP sends a RELAY_RESOLVE
+ cell containing an in-addr.arpa address.) The OR replies with a
+ RELAY_RESOLVED cell containing a status byte, and any number of
+ answers. Each answer is of the form:
+ Type (1 octet)
+ Length (1 octet)
+ Value (variable-width)
+ TTL (4 octets)
+ "Length" is the length of the Value field.
+ "Type" is one of:
+ 0x00 -- Hostname
+ 0x04 -- IPv4 address
+ 0x06 -- IPv6 address
+ 0xF0 -- Error, transient
+ 0xF1 -- Error, nontransient
+
+ If any answer has a type of 'Error', then no other answer may be given.
+
+ The RELAY_RESOLVE cell must use a nonzero, distinct streamID; the
+ corresponding RELAY_RESOLVED cell must use the same streamID. No stream
+ is actually created by the OR when resolving the name.
+
+7. Flow control
+
+7.1. Link throttling
+
+ Each client or relay should do appropriate bandwidth throttling to
+ keep its user happy.
+
+ Communicants rely on TCP's default flow control to push back when they
+ stop reading.
+
+ The mainline Tor implementation uses token buckets (one for reads,
+ one for writes) for the rate limiting.
+
+ Since 0.2.0.x, Tor has let the user specify an additional pair of
+ token buckets for "relayed" traffic, so people can deploy a Tor relay
+ with strict rate limiting, but also use the same Tor as a client. To
+ avoid partitioning concerns we combine both classes of traffic over a
+ given OR connection, and keep track of the last time we read or wrote
+ a high-priority (non-relayed) cell. If it's been less than N seconds
+ (currently N=30), we give the whole connection high priority, else we
+ give the whole connection low priority. We also give low priority
+ to reads and writes for connections that are serving directory
+ information. See proposal 111 for details.
+
+7.2. Link padding
+
+ Link padding can be created by sending PADDING cells along the
+ connection; relay cells of type "DROP" can be used for long-range
+ padding.
+
+ Currently nodes are not required to do any sort of link padding or
+ dummy traffic. Because strong attacks exist even with link padding,
+ and because link padding greatly increases the bandwidth requirements
+ for running a node, we plan to leave out link padding until this
+ tradeoff is better understood.
+
+7.3. Circuit-level flow control
+
+ To control a circuit's bandwidth usage, each OR keeps track of two
+ 'windows', consisting of how many RELAY_DATA cells it is allowed to
+ originate (package for transmission), and how many RELAY_DATA cells
+ it is willing to consume (receive for local streams). These limits
+ do not apply to cells that the OR receives from one host and relays
+ to another.
+
+ Each 'window' value is initially set to 1000 data cells
+ in each direction (cells that are not data cells do not affect
+ the window). When an OR is willing to deliver more cells, it sends a
+ RELAY_SENDME cell towards the OP, with Stream ID zero. When an OR
+ receives a RELAY_SENDME cell with stream ID zero, it increments its
+ packaging window.
+
+ Each of these cells increments the corresponding window by 100.
+
+ The OP behaves identically, except that it must track a packaging
+ window and a delivery window for every OR in the circuit.
+
+ An OR or OP sends cells to increment its delivery window when the
+ corresponding window value falls under some threshold (900).
+
+ If a packaging window reaches 0, the OR or OP stops reading from
+ TCP connections for all streams on the corresponding circuit, and
+ sends no more RELAY_DATA cells until receiving a RELAY_SENDME cell.
+[this stuff is badly worded; copy in the tor-design section -RD]
+
+7.4. Stream-level flow control
+
+ Edge nodes use RELAY_SENDME cells to implement end-to-end flow
+ control for individual connections across circuits. Similarly to
+ circuit-level flow control, edge nodes begin with a window of cells
+ (500) per stream, and increment the window by a fixed value (50)
+ upon receiving a RELAY_SENDME cell. Edge nodes initiate RELAY_SENDME
+ cells when both a) the window is <= 450, and b) there are less than
+ ten cell payloads remaining to be flushed at that edge.
+
+A.1. Differences between spec and implementation
+
+- The current specification requires all ORs to have IPv4 addresses, but
+ allows servers to exit and resolve to IPv6 addresses, and to declare IPv6
+ addresses in their exit policies. The current codebase has no IPv6
+ support at all.
+
diff --git a/orchid/logging.properties b/orchid/logging.properties
new file mode 100644
index 00000000..5a212d03
--- /dev/null
+++ b/orchid/logging.properties
@@ -0,0 +1,8 @@
+handlers=java.util.logging.ConsoleHandler
+.level = INFO
+
+java.util.logging.ConsoleHandler.level = FINEST
+java.util.logging.ConsoleHandler.formatter = java.util.logging.SimpleFormatter
+java.util.logging.SimpleFormatter.format =[%1$tT] %4$s: %5$s%6$s%n
+
+# com.subgraph.orchid.circuits.level=FINE
diff --git a/orchid/opt/xmlrpc/com/subgraph/orchid/xmlrpc/OrchidXmlRpcTransport.java b/orchid/opt/xmlrpc/com/subgraph/orchid/xmlrpc/OrchidXmlRpcTransport.java
new file mode 100644
index 00000000..bce6d6ab
--- /dev/null
+++ b/orchid/opt/xmlrpc/com/subgraph/orchid/xmlrpc/OrchidXmlRpcTransport.java
@@ -0,0 +1,309 @@
+package com.subgraph.orchid.xmlrpc;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.net.ConnectException;
+import java.net.Socket;
+import java.net.URL;
+import java.net.UnknownHostException;
+import java.security.NoSuchAlgorithmException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.StringTokenizer;
+import java.util.logging.Logger;
+
+import javax.net.SocketFactory;
+import javax.net.ssl.SSLContext;
+import javax.net.ssl.SSLSocket;
+import javax.net.ssl.SSLSocketFactory;
+
+import org.apache.xmlrpc.XmlRpcException;
+import org.apache.xmlrpc.XmlRpcRequest;
+import org.apache.xmlrpc.client.XmlRpcClient;
+import org.apache.xmlrpc.client.XmlRpcClientException;
+import org.apache.xmlrpc.client.XmlRpcHttpClientConfig;
+import org.apache.xmlrpc.client.XmlRpcHttpTransport;
+import org.apache.xmlrpc.client.XmlRpcHttpTransportException;
+import org.apache.xmlrpc.client.XmlRpcLiteHttpTransport;
+import org.apache.xmlrpc.common.XmlRpcStreamRequestConfig;
+import org.apache.xmlrpc.util.HttpUtil;
+import org.apache.xmlrpc.util.LimitedInputStream;
+import org.xml.sax.SAXException;
+
+import com.subgraph.orchid.Tor;
+import com.subgraph.orchid.sockets.AndroidSSLSocketFactory;
+
+public class OrchidXmlRpcTransport extends XmlRpcHttpTransport {
+
+ private final static Logger logger = Logger.getLogger(OrchidXmlRpcTransport.class.getName());
+
+ private final SocketFactory socketFactory;
+ private final SSLContext sslContext;
+
+ private SSLSocketFactory sslSocketFactory;
+
+ public OrchidXmlRpcTransport(XmlRpcClient pClient, SocketFactory socketFactory, SSLContext sslContext) {
+ super(pClient, userAgent);
+ this.socketFactory = socketFactory;
+ this.sslContext = sslContext;
+ }
+
+ public synchronized SSLSocketFactory getSSLSocketFactory() {
+ if(sslSocketFactory == null) {
+ sslSocketFactory = createSSLSocketFactory();
+ }
+ return sslSocketFactory;
+ }
+
+ private SSLSocketFactory createSSLSocketFactory() {
+ if(Tor.isAndroidRuntime()) {
+ return createAndroidSSLSocketFactory();
+ }
+ if(sslContext == null) {
+ return (SSLSocketFactory) SSLSocketFactory.getDefault();
+ } else {
+ return sslContext.getSocketFactory();
+ }
+ }
+
+ private SSLSocketFactory createAndroidSSLSocketFactory() {
+ if(sslContext == null) {
+ try {
+ return new AndroidSSLSocketFactory();
+ } catch (NoSuchAlgorithmException e) {
+ logger.severe("Failed to create default ssl context");
+ System.exit(1);
+ return null;
+ }
+ } else {
+ return new AndroidSSLSocketFactory(sslContext);
+ }
+ }
+
+ protected Socket newSocket(boolean pSSL, String pHostName, int pPort) throws UnknownHostException, IOException {
+ final Socket s = socketFactory.createSocket(pHostName, pPort);
+ if(pSSL) {
+ return getSSLSocketFactory().createSocket(s, pHostName, pPort, true);
+ } else {
+ return s;
+ }
+ }
+
+ private static final String userAgent = USER_AGENT + " (Lite HTTP Transport)";
+ private boolean ssl;
+ private String hostname;
+ private String host;
+ private int port;
+ private String uri;
+ private Socket socket;
+ private OutputStream output;
+ private InputStream input;
+ private final Map headers = new HashMap();
+ private boolean responseGzipCompressed = false;
+ private XmlRpcHttpClientConfig config;
+
+
+ public Object sendRequest(XmlRpcRequest pRequest) throws XmlRpcException {
+ config = (XmlRpcHttpClientConfig) pRequest.getConfig();
+ URL url = config.getServerURL();
+ ssl = "https".equals(url.getProtocol());
+ hostname = url.getHost();
+ int p = url.getPort();
+ port = p < 1 ? 80 : p;
+ String u = url.getFile();
+ uri = (u == null || "".equals(u)) ? "/" : u;
+ host = port == 80 ? hostname : hostname + ":" + port;
+ headers.put("Host", host);
+ return super.sendRequest(pRequest);
+ }
+
+ protected void setRequestHeader(String pHeader, String pValue) {
+ Object value = headers.get(pHeader);
+ if (value == null) {
+ headers.put(pHeader, pValue);
+ } else {
+ List