You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

39 lines
1.9 KiB

import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.parquet.hadoop.util.HadoopInputFile;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
public class ReadParquetMeta {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Path path = new Path(args[0]);
HadoopInputFile inputFile = HadoopInputFile.fromPath(path, conf);
try (ParquetFileReader reader = ParquetFileReader.open(inputFile)) {
ParquetMetadata meta = reader.getFooter();
System.out.println("=== Parquet File Metadata ===");
System.out.println("Blocks (row groups): " + meta.getBlocks().size());
System.out.println("File size from footer: " + inputFile.getLength());
System.out.println("");
meta.getBlocks().forEach(block -> {
System.out.println("Row Group:");
System.out.println(" Rows: " + block.getRowCount());
System.out.println(" Total byte size: " + block.getTotalByteSize());
System.out.println(" Columns: " + block.getColumns().size());
System.out.println("");
block.getColumns().forEach(col -> {
System.out.println(" Column: " + col.getPath());
System.out.println(" First data page offset: " + col.getFirstDataPageOffset());
System.out.println(" Dictionary page offset: " + col.getDictionaryPageOffset());
System.out.println(" Total size: " + col.getTotalSize());
System.out.println(" Total uncompressed size: " + col.getTotalUncompressedSize());
System.out.println("");
});
});
}
}
}