THIS IS A TEST INSTANCE. ALL YOUR CHANGES WILL BE LOST!!!!
...
Code Block | ||
---|---|---|
| ||
import System.nanoTime def profile[R](code: => R, t: Long = nanoTime) = (code, (((nanoTime - t)/1000)).toInt) import org.apache.hadoop.hbase.io.hfile.CacheConfig import org.apache.hudi.io.storage.HoodieHFileReader import org.apache.hadoop.fs.Path import scala.collection.JavaConverters._ val cacheConfig = new CacheConfig(spark.sparkContext.hadoopConfiguration); cacheConfig.setCacheDataInL1(false); val hfilePath = new Path("hdfs://ns-router-dca1/uber-data/tables/temp/satish/hudi-trips-metadata-hfile1tables/my_metadata_table_hfile/default/53dd4e23-012c-4e3e-91fc-9d5ff6a3bf83-0_0-48-91_20210327201656.hfile") val reader = new HoodieHFileReader(spark.sparkContext.hadoopConfiguration, hfilePath, cacheConfig); val keys = reader.readAllRecords().asScala.map(x => x.getFirst()) val keyRange = keys.slice(95000, 100000) // pick desired number of keys var totalTime: Long = 0 var totalRecords: Long = 0 val reader = new HoodieHFileReader(spark.sparkContext.hadoopConfiguration, hfilePath, cacheConfig) (keyRange).map { k => val (record, time) = profile { reader.getRecordByKey(k) } totalTime += time if (record.isPresent()) { totalRecords += 1L} } "" + (totalTime/1000) + " ms to LOOKUP HFile, #records: " + totalRecords |
...