Approvers

TBD (This is a WIP RFC, Approvers to be added after it is ready to review)

...

Current State

Status

title	Under Discussion

Status

colour	Yellow
title	In Progress

Status


colour	Red
title	ABANDONED

Status

colour	Green
title	Completed

Status


colour	Blue
title	INactive

...

Code Block

title	querying hfile range metadata

import System.nanoTime
def profile[R](code: => R, t: Long = nanoTime) = (code, (((nanoTime - t)/1000)).toInt)

import org.apache.hadoop.hbase.io.hfile.CacheConfig
import org.apache.hudi.io.storage.HoodieHFileReader
import org.apache.hadoop.fs.Path
import scala.collection.JavaConverters._


val cacheConfig = new CacheConfig(spark.sparkContext.hadoopConfiguration);
cacheConfig.setCacheDataInL1(false);

val hfilePath = new Path("hdfs://ns-router-dca1/uber-data/tables/temp/satish/hudi-trips-metadata-hfile1my_metadata_table_hfile/default/53dd4e23-012c-4e3e-91fc-9d5ff6a3bf83-0_0-48-91_20210327201656.hfile")
val reader = new HoodieHFileReader(spark.sparkContext.hadoopConfiguration, hfilePath, cacheConfig);

val keys = reader.readAllRecords().asScala.map(x => x.getFirst())
val keyRange = keys.slice(95000, 100000) // pick desired number of keys

var totalTime: Long = 0
var totalRecords: Long = 0
val reader = new HoodieHFileReader(spark.sparkContext.hadoopConfiguration, hfilePath, cacheConfig)
(keyRange).map { k =>
    val (record, time) = profile { reader.getRecordByKey(k) }
    totalTime += time
    if (record.isPresent()) { totalRecords += 1L}
}
"" + (totalTime/1000) + " ms to LOOKUP HFile, #records: " + totalRecords

...

Space shortcuts

Page tree

Versions Compared

Old Version 18

New Version Current

Key

Approvers

Space shortcuts

Page tree

Page History

Versions Compared

Old Version 18

New Version Current

Key

Approvers