forked from Qortal/qortal
Rework HSQLDB CHECKPOINTing to defer until there are no ongoing SQL transactions, in order to prevent DB deadlocks.
Symptoms of a CHECKPOINT-related DB deadlock: On Controller thread: "Controller" #20 prio=5 os_prio=31 cpu=1577665.56ms elapsed=17666.97s allocated=475G defined_classes=412 tid=0x00007fe99f97b000 nid=0x1644b waiting on condition [0x0000700009a21000] java.lang.Thread.State: WAITING (parking) at jdk.internal.misc.Unsafe.park(java.base@14.0.2/Native Method) - parking to wait for <0x0000000602f2a6f8> (a org.hsqldb.lib.CountUpDownLatch$Sync) [...some more lines...] [this next line is the best indicator: ] at org.qortal.repository.hsqldb.HSQLDBRepository.checkpoint(HSQLDBRepository.java:385) at org.qortal.repository.RepositoryManager.checkpoint(RepositoryManager.java:51) at org.qortal.controller.Controller.run(Controller.java:544) Other threads stuck at: - parking to wait for <0x00000007ff09f0b0> (a org.hsqldb.lib.CountUpDownLatch$Sync) at java.util.concurrent.locks.LockSupport.park(java.base@14.0.2/LockSupport.java:211) at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(java.base@14.0.2/AbstractQueuedSynchronizer.java:714) at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireSharedInterruptibly(java.base@14.0.2/AbstractQueuedSynchronizer.java:1046) at org.hsqldb.lib.CountUpDownLatch.await(Unknown Source) at org.hsqldb.Session.executeCompiledStatement(Unknown Source)
This commit is contained in:
parent
eb23940996
commit
3453f0efaf
@ -628,25 +628,9 @@ public class AdminResource {
|
||||
public String checkpointRepository() {
|
||||
Security.checkApiCallAllowed(request);
|
||||
|
||||
try (final Repository repository = RepositoryManager.getRepository()) {
|
||||
ReentrantLock blockchainLock = Controller.getInstance().getBlockchainLock();
|
||||
RepositoryManager.setRequestedCheckpoint(Boolean.TRUE);
|
||||
|
||||
blockchainLock.lockInterruptibly();
|
||||
|
||||
try {
|
||||
repository.checkpoint(true);
|
||||
repository.saveChanges();
|
||||
|
||||
return "true";
|
||||
} finally {
|
||||
blockchainLock.unlock();
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
// We couldn't lock blockchain to perform checkpoint
|
||||
return "false";
|
||||
} catch (DataException e) {
|
||||
throw ApiExceptionFactory.INSTANCE.createException(request, ApiError.REPOSITORY_ISSUE, e);
|
||||
}
|
||||
return "true";
|
||||
}
|
||||
|
||||
@POST
|
||||
|
@ -536,12 +536,7 @@ public class Controller extends Thread {
|
||||
if (now >= repositoryCheckpointTimestamp + repositoryCheckpointInterval) {
|
||||
repositoryCheckpointTimestamp = now + repositoryCheckpointInterval;
|
||||
|
||||
if (Settings.getInstance().getShowCheckpointNotification())
|
||||
SysTray.getInstance().showMessage(Translator.INSTANCE.translate("SysTray", "DB_CHECKPOINT"),
|
||||
Translator.INSTANCE.translate("SysTray", "PERFORMING_DB_CHECKPOINT"),
|
||||
MessageType.INFO);
|
||||
|
||||
RepositoryManager.checkpoint(true);
|
||||
RepositoryManager.setRequestedCheckpoint(Boolean.TRUE);
|
||||
}
|
||||
|
||||
// Give repository a chance to backup (if enabled)
|
||||
|
@ -47,8 +47,6 @@ public interface Repository extends AutoCloseable {
|
||||
|
||||
public void backup(boolean quick) throws DataException;
|
||||
|
||||
public void checkpoint(boolean quick) throws DataException;
|
||||
|
||||
public void performPeriodicMaintenance() throws DataException;
|
||||
|
||||
public void exportNodeLocalData() throws DataException;
|
||||
|
@ -4,6 +4,9 @@ public abstract class RepositoryManager {
|
||||
|
||||
private static RepositoryFactory repositoryFactory = null;
|
||||
|
||||
/** null if no checkpoint requested, TRUE for quick checkpoint, false for slow/full checkpoint. */
|
||||
private static Boolean quickCheckpointRequested = null;
|
||||
|
||||
public static RepositoryFactory getRepositoryFactory() {
|
||||
return repositoryFactory;
|
||||
}
|
||||
@ -46,12 +49,12 @@ public abstract class RepositoryManager {
|
||||
}
|
||||
}
|
||||
|
||||
public static void checkpoint(boolean quick) {
|
||||
try (final Repository repository = getRepository()) {
|
||||
repository.checkpoint(quick);
|
||||
} catch (DataException e) {
|
||||
// Checkpoint is best-effort so don't complain
|
||||
}
|
||||
public static void setRequestedCheckpoint(Boolean quick) {
|
||||
quickCheckpointRequested = quick;
|
||||
}
|
||||
|
||||
public static Boolean getRequestedCheckpoint() {
|
||||
return quickCheckpointRequested;
|
||||
}
|
||||
|
||||
public static void rebuild() throws DataException {
|
||||
|
@ -1,5 +1,6 @@
|
||||
package org.qortal.repository.hsqldb;
|
||||
|
||||
import java.awt.TrayIcon.MessageType;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.math.BigDecimal;
|
||||
@ -31,6 +32,8 @@ import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.qortal.account.PrivateKeyAccount;
|
||||
import org.qortal.crypto.Crypto;
|
||||
import org.qortal.globalization.Translator;
|
||||
import org.qortal.gui.SysTray;
|
||||
import org.qortal.repository.ATRepository;
|
||||
import org.qortal.repository.AccountRepository;
|
||||
import org.qortal.repository.ArbitraryRepository;
|
||||
@ -54,6 +57,8 @@ public class HSQLDBRepository implements Repository {
|
||||
|
||||
private static final Logger LOGGER = LogManager.getLogger(HSQLDBRepository.class);
|
||||
|
||||
private static final Object CHECKPOINT_LOCK = new Object();
|
||||
|
||||
protected Connection connection;
|
||||
protected final Deque<Savepoint> savepoints = new ArrayDeque<>(3);
|
||||
protected boolean debugState = false;
|
||||
@ -103,7 +108,10 @@ public class HSQLDBRepository implements Repository {
|
||||
throw new DataException("Unable to fetch session ID from repository", e);
|
||||
}
|
||||
|
||||
assertEmptyTransaction("connection creation");
|
||||
// synchronize to block new connections if checkpointing in progress
|
||||
synchronized (CHECKPOINT_LOCK) {
|
||||
assertEmptyTransaction("connection creation");
|
||||
}
|
||||
}
|
||||
|
||||
// Getters / setters
|
||||
@ -284,6 +292,9 @@ public class HSQLDBRepository implements Repository {
|
||||
this.sqlStatements = null;
|
||||
this.savepoints.clear();
|
||||
|
||||
// If a checkpoint has been requested, we could perform that now
|
||||
this.maybeCheckpoint();
|
||||
|
||||
// Give connection back to the pool
|
||||
this.connection.close();
|
||||
this.connection = null;
|
||||
@ -292,6 +303,58 @@ public class HSQLDBRepository implements Repository {
|
||||
}
|
||||
}
|
||||
|
||||
private void maybeCheckpoint() throws DataException {
|
||||
// To serialize checkpointing and to block new sessions when checkpointing in progress
|
||||
synchronized (CHECKPOINT_LOCK) {
|
||||
Boolean quickCheckpointRequest = RepositoryManager.getRequestedCheckpoint();
|
||||
if (quickCheckpointRequest == null)
|
||||
return;
|
||||
|
||||
// We can only perform a CHECKPOINT if no other HSQLDB session is mid-transaction,
|
||||
// otherwise the CHECKPOINT blocks for COMMITs and other threads can't open HSQLDB sessions
|
||||
// due to HSQLDB blocking until CHECKPOINT finishes - i.e. deadlock
|
||||
String sql = "SELECT COUNT(*) "
|
||||
+ "FROM Information_schema.system_sessions "
|
||||
+ "WHERE transaction = TRUE";
|
||||
|
||||
try {
|
||||
PreparedStatement pstmt = this.cachePreparedStatement(sql);
|
||||
|
||||
if (!pstmt.execute())
|
||||
throw new DataException("Unable to check repository session status");
|
||||
|
||||
try (ResultSet resultSet = pstmt.getResultSet()) {
|
||||
if (resultSet == null || !resultSet.next())
|
||||
// Failed to even find HSQLDB session info!
|
||||
throw new DataException("No results when checking repository session status");
|
||||
|
||||
int transactionCount = resultSet.getInt(1);
|
||||
|
||||
if (transactionCount > 0)
|
||||
// We can't safely perform CHECKPOINT due to ongoing SQL transactions
|
||||
return;
|
||||
}
|
||||
|
||||
LOGGER.info("Performing repository CHECKPOINT...");
|
||||
|
||||
if (Settings.getInstance().getShowCheckpointNotification())
|
||||
SysTray.getInstance().showMessage(Translator.INSTANCE.translate("SysTray", "DB_CHECKPOINT"),
|
||||
Translator.INSTANCE.translate("SysTray", "PERFORMING_DB_CHECKPOINT"),
|
||||
MessageType.INFO);
|
||||
|
||||
try (Statement stmt = this.connection.createStatement()) {
|
||||
stmt.execute(Boolean.TRUE.equals(quickCheckpointRequest) ? "CHECKPOINT" : "CHECKPOINT DEFRAG");
|
||||
}
|
||||
|
||||
// Completed!
|
||||
LOGGER.info("Repository CHECKPOINT completed!");
|
||||
RepositoryManager.setRequestedCheckpoint(null);
|
||||
} catch (SQLException e) {
|
||||
throw new DataException("Unable to check repository session status", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void rebuild() throws DataException {
|
||||
LOGGER.info("Rebuilding repository from scratch");
|
||||
@ -379,15 +442,6 @@ public class HSQLDBRepository implements Repository {
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkpoint(boolean quick) throws DataException {
|
||||
try (Statement stmt = this.connection.createStatement()) {
|
||||
stmt.execute(quick ? "CHECKPOINT" : "CHECKPOINT DEFRAG");
|
||||
} catch (SQLException e) {
|
||||
throw new DataException("Unable to perform repository checkpoint");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void performPeriodicMaintenance() throws DataException {
|
||||
// Defrag DB - takes a while!
|
||||
|
Loading…
Reference in New Issue
Block a user