...
There should be a cleaner, more direct route. Maybe someone more familiar with VFS can post better code.
Conceptually there is a tar file:
No Format |
---|
archive.tar
+- tardir/
+- content.txt.gz
|
I'd like to end up with an uncompressed file "content.txt". Create this sample archive.tar
file with some (unix) commands like:
No Format |
---|
ls -l > context.txt
gzip content.txt
mkdir tardir
mv content.txt.gz tardir
tar cvf archive.tar tardir
rm -r tardir
|
For this example the sample archive.tar
is located in the /extra/data/tryVfs
directory. You can see that hardcoded in the java example below. The content.txt
file will be extracted into the same location.
This example uses Maven2. There is a pom.xml
to define the project
...
Content of src/main/java/gov/noaa/eds/tryVfs/MultiStep.java
No Format |
---|
/* * MultiStep.java */ package gov.noaa.eds.tryVfs; import org.apache.commons.vfs.AllFileSelector; import org.apache.commons.vfs.FileName; import org.apache.commons.vfs.FileObject; import org.apache.commons.vfs.FileSystemException; import org.apache.commons.vfs.FileSystemManager; import org.apache.commons.vfs.FileType; import org.apache.commons.vfs.FileTypeSelector; import org.apache.commons.vfs.VFS; import org.apache.commons.vfs.provider.local.LocalFile; /** * Try using VFS to read the content of a compressed (gz) file inside of * a tar file. Extract tar file objects. If they are gzip files, decompress them. * Any directory structure in the tarfile is not being preserved, the contents * are pulled out to the same location regardless of directory hierarchy (for * the purposes of this example, all objects in the tar file have unique names, * so there are no file name conflicts). * * Use a multiple step approach. * 1. extract gzipped file from tar file * 2. decompress gzipped content to a temporary directory * 3. move decompressed content to desired destination * 4. remove temporary directory * 5. remove gzipped file * * There should be a cleaner more direct route, but I haven't discovered it yet. * * @author ktanaka */ public class MultiStep { FileSystemManager fsManager = null; static String extractDirname = "/extra/data/tryVfs"; LocalFile extractDir = null; /** * Extract files from a tar file. If the file extracted is gzipped, * decompress it and remove the gzipped version. * @param args command line arguments are currently not used */ public static void main( String[] args ) { MultiStep msExtract = new MultiStep(); try { msExtract.fsManager = VFS.getManager(); } catch (FileSystemException ex) { throw new RuntimeException("failed to get fsManager from VFS", ex); } try { msExtract.extractDir = (LocalFile) msExtract.fsManager.resolveFile("file://" + extractDirname); if (! msExtract.extractDir.exists()) { msExtract.extractDir.createFolder(); } } catch (FileSystemException ex) { throw new RuntimeException("failed to prepare extract directory " + extractDirname, ex); } /* Create a tarFile object */ FileObject tarFile; try { System.out.println("Resolve tar file:"); tarFile = msExtract.fsManager.resolveFile( "tar:/extra/data/tryVfs/archive.tar"); FileName tarFileName = tarFile.getName(); System.out.println(" Path : " + tarFileName.getPath()); System.out.println(" URI : " + tarFileName.getURI()); } catch (Exception ex) { throw new RuntimeException("failed to open tar file ", ex); } /* Work on files inside tarFile */ FileObject[] children; try { children = tarFile.getChildren(); } catch (FileSystemException ex) { throw new RuntimeException("failed to get contents of tarfile ", ex); } for (FileObject f : children) { msExtract.processChild(f); } } // main( String[] args ) private void processChild(FileObject f) { try { if (f.getType() == FileType.FOLDER) { // Recursively process files in this folder FileObject[] children = f.getChildren(); for (FileObject subfile : children) { processChild(subfile); } } else { FileName fname = f.getName(); String extractName = new String(this.extractDir.getName() + "/" + fname.getBaseName()); System.out.println("Extracting " + extractName); LocalFile extractFile = (LocalFile) this.fsManager.resolveFile(extractName); extractFile.copyFrom(f, new AllFileSelector()); // if the file is gzipped, decompress it if (extractFile.getName().getExtension().equals("gz")) { System.out.println("Decompressing " + extractName); String gzName = new String("gz://" + extractFile.getName().getPath()); System.out.println("gzName=" + gzName); FileObject gzFile = this.fsManager.resolveFile(gzName); String fileName = extractFile.getName().getBaseName().replaceAll(".gz$", ""); // The decompressed path we want String decompName = new String(this.extractDir.getName() + "/" + fileName); // A temporary Directory String tmpDirname = new String(this.extractDir.getName() + "/" + fileName + ".tmp"); // A temporary file path String tmpFilename = new String(tmpDirname + "/" + fileName); // Some debug lines System.out.println("fileName =" + fileName); System.out.println("decompName =" + decompName); System.out.println("tmpDirname =" + tmpDirname); System.out.println("tmpFilename=" + tmpFilename); // Extracting from gzip file ends up with a directory containing what // we want. LocalFile tmpDir = (LocalFile) this.fsManager.resolveFile(tmpDirname); tmpDir.copyFrom(gzFile, new FileTypeSelector(FileType.FILE)); // Move the uncompressed file to the location desired. LocalFile tmpFile = (LocalFile) this.fsManager.resolveFile(tmpFilename); LocalFile decompFile = (LocalFile) this.fsManager.resolveFile(decompName); tmpFile.moveTo(decompFile); // Delete the temporary directory. tmpDir.delete(new AllFileSelector()); // Delete the gzip file now that we have the uncompressed version. // Note that the plain file FileObject (extractFile) is used // for deleting instead of the gzip FileObject (gzFile). extractFile.delete(new AllFileSelector()); } } } catch (FileSystemException ex) { ex.printStackTrace(); throw new RuntimeException("Error working on tarfile object " + f.getName()); } } // processChild(FileObject f) } |