Motivation
Table statistics describe the data distribution characteristics of a table. Common statistics include the number of rows, table size, column statistics and more. They are very important for DBMS, especially when executing query plans and optimizing query performance.
...
- distinctCount: The number of distinct values.
- min: The minimum value of the column.
- max: The maximum value of the column.
- nullCount: The number of nulls.
- avgLen: Average column length.
- maxLen: Maximum column length.
Storage
snapshot
Add a new filed named stats
field `statistics` in snapshot to represent the stats file name which store the stats:
...
Code Block |
---|
language | js |
---|
title | stats-87effd5d-48fd-4aab-81fe-4222b847d247-0 |
---|
|
{
"snapshotId": "10",
"mergedRecordCount" : 10,
"mergedRecordSize" : 1000,
"colStats" : {
"orderId" : {
"colId" : 1,
"distinctCount" : 10,
"min" : "1",
"max" : "10",
"nullCount" : 0,
"avgLen" : 8,
"maxLen" : 8
}
}
} |
...
Code Block |
---|
language | java |
---|
title | Stats.class |
---|
|
public class StatsStatistics {
private Longfinal long snapshotId;
private final long schemaId;
private final @Nullable Long mergedRecordCount;
private final @Nullable Long mergedRecordSize;
private final @Nullable Map<String, ColStats>ColStats<?>> colStats;
} |
Code Block |
---|
language | java |
---|
title | ColStats.class |
---|
|
public class ColStatsColStats<T> {
private final int colId;
private final @Nullable Long distinctCount;
private @Nullable String serializedMin;
private @Nullable ObjectComparable<T> min;
private @Nullable String serializedMax;
private @Nullable ObjectComparable<T> max;
private final @Nullable Long nullCount;
private final @Nullable Long avgLen;
private final @Nullable Long maxLen;
} |
...
Code Block |
---|
language | java |
---|
title | StatsFileHandler.class |
---|
|
public class StatsFileHandler {
/* Write stats to a stats file, return the written filename. */
public String writeStats(StatsStatistics stats)
/* Read stats of the specified snapshot. */
public Optional<Stats>Optional<Statistics> readStats(long snapshotId)
/* Delete stats of the specified snapshot. */
public void deleteStats(long snapshotId)
} |
...
Code Block |
---|
language | java |
---|
title | FileStoreCommit.class |
---|
|
public interface FileStoreCommit {
...
/**
* WriteCommit new statsstatistics. The {@link Snapshot.CommitKind} of generated snapshot is {@link
* Snapshot.CommitKind#ANALYZE}.
*/
void writeStatscommitStatistics(StatsStatistics stats, long commitIdentifier);
...
} |
...