forked from Qortal/qortal
Rework of repository maintenance and backups
It will now attempt to wait until there are no other active transactions before starting, to avoid deadlocks. A timeout for this process is specified - generally 60 seconds - so that callers can give up or retry if something is holding a transaction open for too long. Right now we will give up in all places except for bootstrap creation, where it will keep retrying until successful.
This commit is contained in:
parent
c4d7335fdd
commit
adeb654248
@ -1,6 +1,7 @@
|
||||
package org.qortal;
|
||||
|
||||
import java.security.Security;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
@ -57,10 +58,10 @@ public class RepositoryMaintenance {
|
||||
|
||||
LOGGER.info("Starting repository periodic maintenance. This can take a while...");
|
||||
try (final Repository repository = RepositoryManager.getRepository()) {
|
||||
repository.performPeriodicMaintenance();
|
||||
repository.performPeriodicMaintenance(null);
|
||||
|
||||
LOGGER.info("Repository periodic maintenance completed");
|
||||
} catch (DataException e) {
|
||||
} catch (DataException | TimeoutException e) {
|
||||
LOGGER.error("Repository periodic maintenance failed", e);
|
||||
}
|
||||
|
||||
|
@ -22,6 +22,7 @@ import java.time.OffsetDateTime;
|
||||
import java.time.ZoneOffset;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@ -668,14 +669,16 @@ public class AdminResource {
|
||||
blockchainLock.lockInterruptibly();
|
||||
|
||||
try {
|
||||
repository.backup(true, "backup");
|
||||
// Timeout if the database isn't ready for backing up after 60 seconds
|
||||
long timeout = 60 * 1000L;
|
||||
repository.backup(true, "backup", timeout);
|
||||
repository.saveChanges();
|
||||
|
||||
return "true";
|
||||
} finally {
|
||||
blockchainLock.unlock();
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
} catch (InterruptedException | TimeoutException e) {
|
||||
// We couldn't lock blockchain to perform backup
|
||||
return "false";
|
||||
} catch (DataException e) {
|
||||
@ -700,13 +703,15 @@ public class AdminResource {
|
||||
blockchainLock.lockInterruptibly();
|
||||
|
||||
try {
|
||||
repository.performPeriodicMaintenance();
|
||||
// Timeout if the database isn't ready to start after 60 seconds
|
||||
long timeout = 60 * 1000L;
|
||||
repository.performPeriodicMaintenance(timeout);
|
||||
} finally {
|
||||
blockchainLock.unlock();
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
// No big deal
|
||||
} catch (DataException e) {
|
||||
} catch (DataException | TimeoutException e) {
|
||||
throw ApiExceptionFactory.INSTANCE.createException(request, ApiError.REPOSITORY_ISSUE, e);
|
||||
}
|
||||
}
|
||||
|
@ -14,6 +14,7 @@ import java.security.NoSuchAlgorithmException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
@ -215,8 +216,17 @@ public class AutoUpdate extends Thread {
|
||||
}
|
||||
|
||||
// Give repository a chance to backup in case things go badly wrong (if enabled)
|
||||
if (Settings.getInstance().getRepositoryBackupInterval() > 0)
|
||||
RepositoryManager.backup(true, "backup");
|
||||
if (Settings.getInstance().getRepositoryBackupInterval() > 0) {
|
||||
try {
|
||||
// Timeout if the database isn't ready for backing up after 60 seconds
|
||||
long timeout = 60 * 1000L;
|
||||
RepositoryManager.backup(true, "backup", timeout);
|
||||
|
||||
} catch (TimeoutException e) {
|
||||
LOGGER.info("Attempt to backup repository failed due to timeout: {}", e.getMessage());
|
||||
// Continue with the auto update anyway...
|
||||
}
|
||||
}
|
||||
|
||||
// Call ApplyUpdate to end this process (unlocking current JAR so it can be replaced)
|
||||
String javaHome = System.getProperty("java.home");
|
||||
|
@ -27,6 +27,7 @@ import java.util.Properties;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
import java.util.function.Function;
|
||||
@ -579,7 +580,14 @@ public class Controller extends Thread {
|
||||
Translator.INSTANCE.translate("SysTray", "CREATING_BACKUP_OF_DB_FILES"),
|
||||
MessageType.INFO);
|
||||
|
||||
RepositoryManager.backup(true, "backup");
|
||||
try {
|
||||
// Timeout if the database isn't ready for backing up after 60 seconds
|
||||
long timeout = 60 * 1000L;
|
||||
RepositoryManager.backup(true, "backup", timeout);
|
||||
|
||||
} catch (TimeoutException e) {
|
||||
LOGGER.info("Attempt to backup repository failed due to timeout: {}", e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
// Prune stuck/slow/old peers
|
||||
|
@ -19,6 +19,7 @@ import java.io.InputStream;
|
||||
import java.net.URL;
|
||||
import java.nio.file.*;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
|
||||
import static java.nio.file.StandardCopyOption.REPLACE_EXISTING;
|
||||
@ -227,11 +228,18 @@ public class Bootstrap {
|
||||
|
||||
repository.saveChanges();
|
||||
|
||||
LOGGER.info("Performing repository maintenance...");
|
||||
repository.performPeriodicMaintenance();
|
||||
|
||||
LOGGER.info("Creating bootstrap...");
|
||||
repository.backup(true, "bootstrap");
|
||||
while (!Controller.isStopping()) {
|
||||
try {
|
||||
// Timeout if the database isn't ready for backing up after 10 seconds
|
||||
long timeout = 10 * 1000L;
|
||||
repository.backup(false, "bootstrap", timeout);
|
||||
break;
|
||||
}
|
||||
catch (TimeoutException e) {
|
||||
LOGGER.info("Unable to create bootstrap due to timeout. Retrying...");
|
||||
}
|
||||
}
|
||||
|
||||
LOGGER.info("Moving files to output directory...");
|
||||
inputPath = Paths.get(Settings.getInstance().getRepositoryPath(), "bootstrap");
|
||||
|
@ -1,6 +1,7 @@
|
||||
package org.qortal.repository;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
|
||||
public interface Repository extends AutoCloseable {
|
||||
|
||||
@ -49,9 +50,9 @@ public interface Repository extends AutoCloseable {
|
||||
|
||||
public void setDebug(boolean debugState);
|
||||
|
||||
public void backup(boolean quick, String name) throws DataException;
|
||||
public void backup(boolean quick, String name, Long timeout) throws DataException, TimeoutException;
|
||||
|
||||
public void performPeriodicMaintenance() throws DataException;
|
||||
public void performPeriodicMaintenance(Long timeout) throws DataException, TimeoutException;
|
||||
|
||||
public void exportNodeLocalData() throws DataException;
|
||||
|
||||
|
@ -8,6 +8,7 @@ import org.qortal.repository.hsqldb.HSQLDBRepository;
|
||||
import org.qortal.settings.Settings;
|
||||
|
||||
import java.sql.SQLException;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
|
||||
public abstract class RepositoryManager {
|
||||
private static final Logger LOGGER = LogManager.getLogger(RepositoryManager.class);
|
||||
@ -51,9 +52,9 @@ public abstract class RepositoryManager {
|
||||
repositoryFactory = null;
|
||||
}
|
||||
|
||||
public static void backup(boolean quick, String name) {
|
||||
public static void backup(boolean quick, String name, Long timeout) throws TimeoutException {
|
||||
try (final Repository repository = getRepository()) {
|
||||
repository.backup(quick, name);
|
||||
repository.backup(quick, name, timeout);
|
||||
} catch (DataException e) {
|
||||
// Backup is best-effort so don't complain
|
||||
}
|
||||
|
@ -12,6 +12,7 @@ import org.qortal.settings.Settings;
|
||||
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
|
||||
/**
|
||||
*
|
||||
@ -315,7 +316,15 @@ public class HSQLDBDatabasePruning {
|
||||
}
|
||||
|
||||
public static void performMaintenance(Repository repository) throws SQLException, DataException {
|
||||
repository.performPeriodicMaintenance();
|
||||
try {
|
||||
// Timeout if the database isn't ready for backing up after 5 minutes
|
||||
// Nothing else should be using the db at this point, so a timeout shouldn't happen
|
||||
long timeout = 5 * 60 * 1000L;
|
||||
repository.performPeriodicMaintenance(timeout);
|
||||
|
||||
} catch (TimeoutException e) {
|
||||
LOGGER.info("Attempt to perform maintenance failed due to timeout: {}", e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -16,6 +16,7 @@ import java.sql.SQLException;
|
||||
import java.sql.Savepoint;
|
||||
import java.sql.Statement;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Stream;
|
||||
@ -379,66 +380,92 @@ public class HSQLDBRepository implements Repository {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void backup(boolean quick, String name) throws DataException {
|
||||
if (!quick)
|
||||
// First perform a CHECKPOINT
|
||||
public void backup(boolean quick, String name, Long timeout) throws DataException, TimeoutException {
|
||||
synchronized (CHECKPOINT_LOCK) {
|
||||
|
||||
// We can only perform a CHECKPOINT if no other HSQLDB session is mid-transaction,
|
||||
// otherwise the CHECKPOINT blocks for COMMITs and other threads can't open HSQLDB sessions
|
||||
// due to HSQLDB blocking until CHECKPOINT finishes - i.e. deadlock.
|
||||
// Since we don't want to give up too easily, it's best to wait until the other transaction
|
||||
// count reaches zero, and then continue.
|
||||
this.blockUntilNoOtherTransactions(timeout);
|
||||
|
||||
if (!quick)
|
||||
// First perform a CHECKPOINT
|
||||
try (Statement stmt = this.connection.createStatement()) {
|
||||
LOGGER.info("Performing maintenance - this will take a while...");
|
||||
stmt.execute("CHECKPOINT");
|
||||
stmt.execute("CHECKPOINT DEFRAG");
|
||||
LOGGER.info("Maintenance completed");
|
||||
} catch (SQLException e) {
|
||||
throw new DataException("Unable to prepare repository for backup");
|
||||
}
|
||||
|
||||
// Clean out any previous backup
|
||||
try {
|
||||
String connectionUrl = this.connection.getMetaData().getURL();
|
||||
String dbPathname = getDbPathname(connectionUrl);
|
||||
if (dbPathname == null)
|
||||
throw new DataException("Unable to locate repository for backup?");
|
||||
|
||||
// Doesn't really make sense to backup an in-memory database...
|
||||
if (dbPathname.equals("mem")) {
|
||||
LOGGER.debug("Ignoring request to backup in-memory repository!");
|
||||
return;
|
||||
}
|
||||
|
||||
String backupUrl = buildBackupUrl(dbPathname, name);
|
||||
String backupPathname = getDbPathname(backupUrl);
|
||||
if (backupPathname == null)
|
||||
throw new DataException("Unable to determine location for repository backup?");
|
||||
|
||||
Path backupDirPath = Paths.get(backupPathname).getParent();
|
||||
String backupDirPathname = backupDirPath.toString();
|
||||
|
||||
try (Stream<Path> paths = Files.walk(backupDirPath)) {
|
||||
paths.sorted(Comparator.reverseOrder())
|
||||
.map(Path::toFile)
|
||||
.filter(file -> file.getPath().startsWith(backupDirPathname))
|
||||
.forEach(File::delete);
|
||||
}
|
||||
} catch (NoSuchFileException e) {
|
||||
// Nothing to remove
|
||||
} catch (SQLException | IOException e) {
|
||||
throw new DataException("Unable to remove previous repository backup");
|
||||
}
|
||||
|
||||
// Actually create backup
|
||||
try (Statement stmt = this.connection.createStatement()) {
|
||||
stmt.execute("CHECKPOINT DEFRAG");
|
||||
LOGGER.info("Backing up repository...");
|
||||
stmt.execute(String.format("BACKUP DATABASE TO '%s/' BLOCKING AS FILES", name));
|
||||
LOGGER.info("Backup completed");
|
||||
} catch (SQLException e) {
|
||||
throw new DataException("Unable to prepare repository for backup");
|
||||
throw new DataException("Unable to backup repository");
|
||||
}
|
||||
|
||||
// Clean out any previous backup
|
||||
try {
|
||||
String connectionUrl = this.connection.getMetaData().getURL();
|
||||
String dbPathname = getDbPathname(connectionUrl);
|
||||
if (dbPathname == null)
|
||||
throw new DataException("Unable to locate repository for backup?");
|
||||
|
||||
// Doesn't really make sense to backup an in-memory database...
|
||||
if (dbPathname.equals("mem")) {
|
||||
LOGGER.debug("Ignoring request to backup in-memory repository!");
|
||||
return;
|
||||
}
|
||||
|
||||
String backupUrl = buildBackupUrl(dbPathname, name);
|
||||
String backupPathname = getDbPathname(backupUrl);
|
||||
if (backupPathname == null)
|
||||
throw new DataException("Unable to determine location for repository backup?");
|
||||
|
||||
Path backupDirPath = Paths.get(backupPathname).getParent();
|
||||
String backupDirPathname = backupDirPath.toString();
|
||||
|
||||
try (Stream<Path> paths = Files.walk(backupDirPath)) {
|
||||
paths.sorted(Comparator.reverseOrder())
|
||||
.map(Path::toFile)
|
||||
.filter(file -> file.getPath().startsWith(backupDirPathname))
|
||||
.forEach(File::delete);
|
||||
}
|
||||
} catch (NoSuchFileException e) {
|
||||
// Nothing to remove
|
||||
} catch (SQLException | IOException e) {
|
||||
throw new DataException("Unable to remove previous repository backup");
|
||||
}
|
||||
|
||||
// Actually create backup
|
||||
try (Statement stmt = this.connection.createStatement()) {
|
||||
stmt.execute(String.format("BACKUP DATABASE TO '%s/' BLOCKING AS FILES", name));
|
||||
} catch (SQLException e) {
|
||||
throw new DataException("Unable to backup repository");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void performPeriodicMaintenance() throws DataException {
|
||||
// Defrag DB - takes a while!
|
||||
try (Statement stmt = this.connection.createStatement()) {
|
||||
LOGGER.info("performing maintenance - this will take a while");
|
||||
stmt.execute("CHECKPOINT");
|
||||
stmt.execute("CHECKPOINT DEFRAG");
|
||||
LOGGER.info("maintenance completed");
|
||||
} catch (SQLException e) {
|
||||
throw new DataException("Unable to defrag repository");
|
||||
public void performPeriodicMaintenance(Long timeout) throws DataException, TimeoutException {
|
||||
synchronized (CHECKPOINT_LOCK) {
|
||||
|
||||
// We can only perform a CHECKPOINT if no other HSQLDB session is mid-transaction,
|
||||
// otherwise the CHECKPOINT blocks for COMMITs and other threads can't open HSQLDB sessions
|
||||
// due to HSQLDB blocking until CHECKPOINT finishes - i.e. deadlock.
|
||||
// Since we don't want to give up too easily, it's best to wait until the other transaction
|
||||
// count reaches zero, and then continue.
|
||||
this.blockUntilNoOtherTransactions(timeout);
|
||||
|
||||
// Defrag DB - takes a while!
|
||||
try (Statement stmt = this.connection.createStatement()) {
|
||||
LOGGER.info("performing maintenance - this will take a while");
|
||||
stmt.execute("CHECKPOINT");
|
||||
stmt.execute("CHECKPOINT DEFRAG");
|
||||
LOGGER.info("maintenance completed");
|
||||
} catch (SQLException e) {
|
||||
throw new DataException("Unable to defrag repository");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -990,4 +1017,51 @@ public class HSQLDBRepository implements Repository {
|
||||
return DEADLOCK_ERROR_CODE.equals(e.getErrorCode());
|
||||
}
|
||||
|
||||
private int otherTransactionsCount() throws DataException {
|
||||
// We can only perform a CHECKPOINT if no other HSQLDB session is mid-transaction,
|
||||
// otherwise the CHECKPOINT blocks for COMMITs and other threads can't open HSQLDB sessions
|
||||
// due to HSQLDB blocking until CHECKPOINT finishes - i.e. deadlock
|
||||
String sql = "SELECT COUNT(*) "
|
||||
+ "FROM Information_schema.system_sessions "
|
||||
+ "WHERE transaction = TRUE AND session_id != ?";
|
||||
try {
|
||||
PreparedStatement pstmt = this.cachePreparedStatement(sql);
|
||||
pstmt.setLong(1, this.sessionId);
|
||||
|
||||
if (!pstmt.execute())
|
||||
throw new DataException("Unable to check repository session status");
|
||||
|
||||
try (ResultSet resultSet = pstmt.getResultSet()) {
|
||||
if (resultSet == null || !resultSet.next())
|
||||
// Failed to even find HSQLDB session info!
|
||||
throw new DataException("No results when checking repository session status");
|
||||
|
||||
int transactionCount = resultSet.getInt(1);
|
||||
|
||||
return transactionCount;
|
||||
}
|
||||
} catch (SQLException e) {
|
||||
throw new DataException("Unable to check repository session status", e);
|
||||
}
|
||||
}
|
||||
|
||||
private void blockUntilNoOtherTransactions(Long timeout) throws DataException, TimeoutException {
|
||||
try {
|
||||
long startTime = System.currentTimeMillis();
|
||||
while (this.otherTransactionsCount() > 0) {
|
||||
// Wait and try again
|
||||
LOGGER.info("Waiting for repository...");
|
||||
Thread.sleep(1000L);
|
||||
|
||||
if (timeout != null) {
|
||||
if (System.currentTimeMillis() - startTime >= timeout) {
|
||||
throw new TimeoutException("Timed out waiting for repository to become available");
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
throw new DataException("Interrupted before repository became available");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -26,13 +26,7 @@ import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.ScheduledExecutorService;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.*;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
@ -445,44 +439,44 @@ public class RepositoryTests extends Common {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDefrag() throws DataException {
|
||||
public void testDefrag() throws DataException, TimeoutException {
|
||||
try (final HSQLDBRepository hsqldb = (HSQLDBRepository) RepositoryManager.getRepository()) {
|
||||
|
||||
this.populateWithRandomData(hsqldb);
|
||||
|
||||
hsqldb.performPeriodicMaintenance();
|
||||
hsqldb.performPeriodicMaintenance(10 * 1000L);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDefragOnDisk() throws DataException {
|
||||
public void testDefragOnDisk() throws DataException, TimeoutException {
|
||||
Common.useSettingsAndDb(testSettingsFilename, false);
|
||||
|
||||
try (final HSQLDBRepository hsqldb = (HSQLDBRepository) RepositoryManager.getRepository()) {
|
||||
|
||||
this.populateWithRandomData(hsqldb);
|
||||
|
||||
hsqldb.performPeriodicMaintenance();
|
||||
hsqldb.performPeriodicMaintenance(10 * 1000L);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleDefrags() throws DataException {
|
||||
public void testMultipleDefrags() throws DataException, TimeoutException {
|
||||
// Mint some more blocks to populate the database
|
||||
try (final HSQLDBRepository hsqldb = (HSQLDBRepository) RepositoryManager.getRepository()) {
|
||||
|
||||
this.populateWithRandomData(hsqldb);
|
||||
|
||||
for (int i = 0; i < 10; i++) {
|
||||
hsqldb.performPeriodicMaintenance();
|
||||
hsqldb.performPeriodicMaintenance(10 * 1000L);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleDefragsOnDisk() throws DataException {
|
||||
public void testMultipleDefragsOnDisk() throws DataException, TimeoutException {
|
||||
Common.useSettingsAndDb(testSettingsFilename, false);
|
||||
|
||||
// Mint some more blocks to populate the database
|
||||
@ -491,31 +485,31 @@ public class RepositoryTests extends Common {
|
||||
this.populateWithRandomData(hsqldb);
|
||||
|
||||
for (int i = 0; i < 10; i++) {
|
||||
hsqldb.performPeriodicMaintenance();
|
||||
hsqldb.performPeriodicMaintenance(10 * 1000L);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleDefragsWithDifferentData() throws DataException {
|
||||
public void testMultipleDefragsWithDifferentData() throws DataException, TimeoutException {
|
||||
for (int i=0; i<10; i++) {
|
||||
try (final HSQLDBRepository hsqldb = (HSQLDBRepository) RepositoryManager.getRepository()) {
|
||||
|
||||
this.populateWithRandomData(hsqldb);
|
||||
hsqldb.performPeriodicMaintenance();
|
||||
hsqldb.performPeriodicMaintenance(10 * 1000L);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleDefragsOnDiskWithDifferentData() throws DataException {
|
||||
public void testMultipleDefragsOnDiskWithDifferentData() throws DataException, TimeoutException {
|
||||
Common.useSettingsAndDb(testSettingsFilename, false);
|
||||
|
||||
for (int i=0; i<10; i++) {
|
||||
try (final HSQLDBRepository hsqldb = (HSQLDBRepository) RepositoryManager.getRepository()) {
|
||||
|
||||
this.populateWithRandomData(hsqldb);
|
||||
hsqldb.performPeriodicMaintenance();
|
||||
hsqldb.performPeriodicMaintenance(10 * 1000L);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user