Skip to content
5 changes: 3 additions & 2 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,9 @@ subprojects {
}

dependencies {
implementation group: 'org.slf4j', name: 'slf4j-api', version: '1.7.25'
implementation group: 'org.slf4j', name: 'jcl-over-slf4j', version: '1.7.25'
implementation group: 'org.slf4j', name: 'slf4j-api', version: '1.7.36'
Comment thread
halibobo1205 marked this conversation as resolved.
implementation group: 'org.slf4j', name: 'jcl-over-slf4j', version: '1.7.36'
implementation group: 'org.slf4j', name: 'jul-to-slf4j', version: '1.7.36'
implementation group: 'ch.qos.logback', name: 'logback-classic', version: '1.2.13'
implementation "com.google.code.findbugs:jsr305:3.0.0"
implementation group: 'org.springframework', name: 'spring-context', version: "${springVersion}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Sets;
import com.google.common.primitives.Bytes;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
Expand All @@ -29,9 +31,14 @@
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.stream.Collectors;
Expand All @@ -45,6 +52,7 @@
import org.iq80.leveldb.ReadOptions;
import org.iq80.leveldb.WriteBatch;
import org.iq80.leveldb.WriteOptions;
import org.tron.common.es.ExecutorServiceManager;
import org.tron.common.parameter.CommonParameter;
import org.tron.common.storage.WriteOptionsWrapper;
import org.tron.common.storage.metric.DbStat;
Expand All @@ -61,6 +69,13 @@
public class LevelDbDataSourceImpl extends DbStat implements DbSourceInter<byte[]>,
Iterable<Entry<byte[], byte[]>>, Instance<LevelDbDataSourceImpl> {

/** First watchdog WARN fires this many seconds after factory.open() begins. */
private static final long OPEN_WATCHDOG_INITIAL_DELAY_SEC = 60;
/** Subsequent watchdog WARN lines are emitted on this interval. */
private static final long OPEN_WATCHDOG_PERIOD_SEC = 30;
/** Value of {@code Filename.currentFileName()}. */
private static final String LEVELDB_CURRENT_FILE = "CURRENT";

private String dataBaseName;
private DB database;
private volatile boolean alive;
Expand Down Expand Up @@ -121,6 +136,15 @@ private void openDatabase(Options dbOptions) throws IOException {
if (!Files.isSymbolicLink(dbPath.getParent())) {
Files.createDirectories(dbPath.getParent());
}
final long openStartNs = System.nanoTime();
final AtomicReference<String> manifestInfo = new AtomicReference<>();
ScheduledExecutorService watchdog = ExecutorServiceManager
Comment thread
halibobo1205 marked this conversation as resolved.
.newSingleThreadScheduledExecutor("db-open-watchdog-" + dataBaseName, true);
ScheduledFuture<?> watchdogTask = watchdog.scheduleAtFixedRate(
() -> logSlowOpen(dbPath, openStartNs, manifestInfo),
OPEN_WATCHDOG_INITIAL_DELAY_SEC,
OPEN_WATCHDOG_PERIOD_SEC,
TimeUnit.SECONDS);
try {
DbSourceInter.checkOrInitEngine(getEngine(), dbPath.toString(),
TronError.ErrCode.LEVELDB_INIT);
Expand All @@ -139,7 +163,55 @@ private void openDatabase(Options dbOptions) throws IOException {
logger.error("Open Database {} failed", dataBaseName, e);
}
throw new TronError(e, TronError.ErrCode.LEVELDB_INIT);
} finally {
watchdogTask.cancel(false);
watchdog.shutdownNow();
}
}

/**
* Emits a WARN when factory.open() is still blocked — usually because the
* MANIFEST has grown large enough to make replay expensive.
*/
void logSlowOpen(Path dbPath, long startNs, AtomicReference<String> manifestInfoCache) {
try {
long elapsedSec = TimeUnit.NANOSECONDS.toSeconds(System.nanoTime() - startNs);
String manifestInfo = manifestInfoCache.get();
if (manifestInfo == null) {
manifestInfo = resolveManifestInfo(dbPath.toFile());
manifestInfoCache.compareAndSet(null, manifestInfo);
}
logger.warn("DB {} open still in progress after {}s. path={}, {}. "
+ "This startup will complete; to speed up future restarts, run "
+ "`java -jar Toolkit.jar db archive -d {}` before the next startup "
+ "to rebuild the MANIFEST (the tool requires an exclusive DB lock, "
+ "so it cannot run while the node is up).",
dataBaseName, elapsedSec, dbPath, manifestInfo, parentPath);
} catch (Exception e) {
// Purely observational - never let the watchdog disrupt startup.
logger.debug("db-open-watchdog failure for {}: {}", dataBaseName, e.getMessage());
}
}

private static String resolveManifestInfo(File dbDir) {
File currentFile = new File(dbDir, LEVELDB_CURRENT_FILE);
String name = "none";
long size = 0;
if (currentFile.isFile()) {
try {
name = new String(Files.readAllBytes(currentFile.toPath()),
StandardCharsets.UTF_8).trim();
File manifest = new File(dbDir, name);
if (manifest.isFile()) {
size = manifest.length();
}
} catch (IOException ignored) {
// Best-effort — keep defaults. A new DB won't hit the 60s threshold
// anyway, so reporting 0.00 MB here is the expected shape.
}
}
return String.format(Locale.ROOT, "MANIFEST=%s (%.2f MB)", name,
size / 1024.0 / 1024.0);
}

public Path getDbPath() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ protected void statProperty() {
double size = Double.parseDouble(tmp[2]) * 1048576.0;
Metrics.gaugeSet(MetricKeys.Gauge.DB_SST_LEVEL, files, getEngine(), getName(), level);
Metrics.gaugeSet(MetricKeys.Gauge.DB_SIZE_BYTES, size, getEngine(), getName(), level);
logger.info("DB {}, level:{},files:{},size:{} M",
logger.debug("DB {}, level:{},files:{},size:{} M",
getName(), level, files, size / 1048576.0);
});
} catch (Exception e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import lombok.Getter;
import lombok.Setter;
Expand Down Expand Up @@ -94,6 +95,8 @@ public class TransactionCapsule implements ProtoCapsule<Transaction> {
.newFixedThreadPool(esName, CommonParameter.getInstance()
.getValidContractProtoThreadNum());
private static final String OWNER_ADDRESS = "ownerAddress_";
// 2-6 ms in general, so we set 50 ms as the threshold for slow signature verification.
private static final long SLOW_SIG_VERIFY_MS = 50;

private Transaction transaction;
@Setter
Expand Down Expand Up @@ -648,6 +651,7 @@ public boolean validatePubSignature(AccountStore accountStore,

byte[] hash = getTransactionId().getBytes();

long startNs = System.nanoTime();
try {
if (!validateSignature(this.transaction, hash, accountStore, dynamicPropertiesStore)) {
isVerified = false;
Expand All @@ -656,12 +660,27 @@ public boolean validatePubSignature(AccountStore accountStore,
} catch (SignatureException | PermissionException | SignatureFormatException e) {
isVerified = false;
throw new ValidateSignatureException(e.getMessage());
} finally {
logSlowSigVerify(startNs);
}
isVerified = true;
}
return true;
}

/**
* INFO-logs when a single signature verification exceeds
* {@link #SLOW_SIG_VERIFY_MS}. Package-private so it can be exercised from
* tests without forcing a real slow crypto path.
*/
void logSlowSigVerify(long startNs) {
long costMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNs);
if (costMs > SLOW_SIG_VERIFY_MS) {
Comment thread
halibobo1205 marked this conversation as resolved.
logger.info("slow verify: txId={}, sigCount={}, cost={} ms",
Comment thread
halibobo1205 marked this conversation as resolved.
getTransactionId(), this.transaction.getSignatureCount(), costMs);
}
}

/**
* validate signature
*/
Expand Down
41 changes: 34 additions & 7 deletions common/src/main/java/org/tron/common/log/LogService.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import ch.qos.logback.classic.LoggerContext;
import ch.qos.logback.classic.joran.JoranConfigurator;
import ch.qos.logback.classic.jul.LevelChangePropagator;
import ch.qos.logback.classic.spi.LoggerContextListener;
import ch.qos.logback.core.util.StatusPrinter;
import java.io.File;
import org.slf4j.LoggerFactory;
Expand All @@ -12,18 +14,43 @@ public class LogService {
public static void load(String path) {
LoggerContext lc = (LoggerContext) LoggerFactory.getILoggerFactory();
try {
File file = new File(path);
if (!file.exists() || !file.isFile() || !file.canRead()) {
return;
// Fail fast rather than silently falling back to the classpath default —
// that legacy behavior misled operators into thinking their custom
// --log-config was active.
if (path != null && !path.isEmpty()) {
File file = new File(path);
if (!file.exists() || !file.isFile() || !file.canRead()) {
throw new IllegalArgumentException(
"logback config is not a readable file: " + path);
}
JoranConfigurator configurator = new JoranConfigurator();
configurator.setContext(lc);
lc.reset();
configurator.doConfigure(file);
}
JoranConfigurator configurator = new JoranConfigurator();
configurator.setContext(lc);
lc.reset();
configurator.doConfigure(file);
// Whether we loaded a custom config via --log-config or kept the classpath
// default, make sure Logback level changes are propagated back to JUL so
// gRPC loggers actually honor the levels declared in the XML. If
// the active config already registered a LevelChangePropagator we leave
// it alone.
ensureLevelChangePropagator(lc);
} catch (Exception e) {
throw new TronError(e, TronError.ErrCode.LOG_LOAD);
} finally {
StatusPrinter.printInCaseOfErrorsOrWarnings(lc);
}
}

private static void ensureLevelChangePropagator(LoggerContext lc) {
for (LoggerContextListener listener : lc.getCopyOfListenerList()) {
if (listener instanceof LevelChangePropagator) {
return;
}
}
LevelChangePropagator propagator = new LevelChangePropagator();
propagator.setContext(lc);
propagator.setResetJUL(true);
propagator.start();
lc.addListener(propagator);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import java.util.Set;
import lombok.Getter;
import lombok.Setter;
import org.slf4j.bridge.SLF4JBridgeHandler;
import org.tron.common.args.GenesisBlock;
import org.tron.common.config.DbBackupConfig;
import org.tron.common.cron.CronExpression;
Expand All @@ -23,6 +24,19 @@

public class CommonParameter {

// Install the JUL->SLF4J bridge early so that JUL log records emitted during
// static init of grpc classes (or from unit tests that don't invoke
// LogService.load()) still reach Logback.
// removeHandlersForRootLogger() strips JUL's default ConsoleHandler so the
// same record is not emitted twice (once by JUL's own console output and
// once via the bridge to Logback).
static {
SLF4JBridgeHandler.removeHandlersForRootLogger();
if (!SLF4JBridgeHandler.isInstalled()) {
SLF4JBridgeHandler.install();
}
}

protected static CommonParameter PARAMETER = new CommonParameter();

// Runtime chain state: set by VMConfig.initVmHardFork()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import ch.qos.logback.core.hook.ShutdownHookBase;
import ch.qos.logback.core.util.Duration;
import org.tron.program.FullNode;

/**
* @author kiven
Expand All @@ -16,11 +15,24 @@ public class TronLogShutdownHook extends ShutdownHookBase {
private static final Duration CHECK_SHUTDOWN_DELAY = Duration.buildByMilliseconds(100);

/**
* The check times before shutdown. default is 60000/100 = 600 times.
* Maximum time to wait for a graceful application shutdown before forcing
* a log flush. Each pool managed by ExecutorServiceManager.shutdownAndAwait-
* Termination() can take up to 120 s in the worst case (60 s await +
* shutdownNow + 60 s await). 180 s is therefore not a hard upper bound, but
* a pragmatic headroom that assumes the many pools in the node shut down
* largely in parallel; in pathological cases trailing shutdown logs may
* still be truncated. In practice 180 s of shutdown output is also enough
* to diagnose most stalls — if a pool is still alive past that window the
* earlier logs already carry the stack/trace context needed to locate the
* offender, so truncating the tail is an acceptable trade-off against
* holding JVM exit open indefinitely.
*/
private final long check_times = 60 * 1000 / CHECK_SHUTDOWN_DELAY.getMilliseconds();
private static final long MAX_WAIT_MS = 3 * 60 * 1000;

// if true, shutdown hook will be executed , for example, 'java -jar FullNode.jar -[v|h]'.
private static final long CHECK_TIMES =
MAX_WAIT_MS / CHECK_SHUTDOWN_DELAY.getMilliseconds();

// if true, shutdown hook will be executed, for example, 'java -jar FullNode.jar -[v|h]'.
public static volatile boolean shutDown = true;

public TronLogShutdownHook() {
Expand All @@ -29,16 +41,19 @@ public TronLogShutdownHook() {
@Override
public void run() {
try {
for (int i = 0; i < check_times; i++) {
for (long i = 0; i < CHECK_TIMES; i++) {
if (shutDown) {
break;
}
addInfo("Sleeping for " + CHECK_SHUTDOWN_DELAY);
if (i % 100 == 0) {
long elapsedSeconds = i * CHECK_SHUTDOWN_DELAY.getMilliseconds() / 1000;
addInfo("Waiting for application shutdown... elapsed=" + elapsedSeconds + "s");
}
Thread.sleep(CHECK_SHUTDOWN_DELAY.getMilliseconds());
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
addInfo("TronLogShutdownHook run error :" + e.getMessage());
addInfo("TronLogShutdownHook interrupted: " + e.getMessage());
}
super.stop();
}
Expand Down
20 changes: 13 additions & 7 deletions framework/src/main/resources/logback.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@

<configuration>

<!-- Propagate logback level changes back to JUL, eliminating the 60x overhead
for disabled log statements when using the jul-to-slf4j bridge.
resetJUL=true clears any explicit JUL logger levels so that the levels
declared below are the effective ones. -->
<contextListener class="ch.qos.logback.classic.jul.LevelChangePropagator">
<resetJUL>true</resetJUL>
</contextListener>

<!-- Be sure to flush latest logs on exit -->
<shutdownHook class="org.tron.core.config.TronLogShutdownHook"/>
<conversionRule conversionWord="m"
Expand Down Expand Up @@ -61,19 +69,17 @@
<!-- (the smaller value -> flush occurs often) -->
<queueSize>100</queueSize>
<includeCallerData>true</includeCallerData>
<!-- Allow up to 5 s to drain the queue on shutdown before giving up -->
<maxFlushTime>5000</maxFlushTime>
<appender-ref ref="FILE"/>
</appender>

<root level="INFO">
<!--<appender-ref ref="CONSOLE"/>-->
<appender-ref ref="ASYNC"/>
</root>


<appender name="METRICS" class="org.tron.common.prometheus.InstrumentedAppender" />

<root level="INFO">
<appender-ref ref="METRICS" />
<!--<appender-ref ref="CONSOLE"/>-->
<appender-ref ref="ASYNC"/>
<appender-ref ref="METRICS"/>
</root>

<logger level="INFO" name="app"/>
Expand Down
Loading
Loading