Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.
Comment: Described the sample data file archive.tar

...

There should be a cleaner, more direct route. Maybe someone more familiar with VFS can post better code.

Conceptually there is a tar file:

No Format

archive.tar
 +- tardir/
     +- content.txt.gz

I'd like to end up with an uncompressed file "content.txt". Create this sample archive.tar file with some (unix) commands like:

No Format

ls -l > context.txt
gzip content.txt
mkdir tardir
mv content.txt.gz tardir
tar cvf archive.tar tardir
rm -r tardir

For this example the sample archive.tar is located in the /extra/data/tryVfs directory. You can see that hardcoded in the java example below. The content.txt file will be extracted into the same location.

This example uses Maven2. There is a pom.xml to define the project

...

Content of src/main/java/gov/noaa/eds/tryVfs/MultiStep.java

No Format

/*
 * MultiStep.java
 */
package gov.noaa.eds.tryVfs;

import org.apache.commons.vfs.AllFileSelector;
import org.apache.commons.vfs.FileName;
import org.apache.commons.vfs.FileObject;
import org.apache.commons.vfs.FileSystemException;
import org.apache.commons.vfs.FileSystemManager;
import org.apache.commons.vfs.FileType;
import org.apache.commons.vfs.FileTypeSelector;
import org.apache.commons.vfs.VFS;
import org.apache.commons.vfs.provider.local.LocalFile;

/**
 * Try using VFS to read the content of a compressed (gz) file inside of
 * a tar file. Extract tar file objects. If they are gzip files, decompress them.
 * Any directory structure in the tarfile is not being preserved, the contents
 * are pulled out to the same location regardless of directory hierarchy (for
 * the purposes of this example, all objects in the tar file have unique names,
 * so there are no file name conflicts).
 *
 * Use a multiple step approach.
 * 1. extract gzipped file from tar file
 * 2. decompress gzipped content to a temporary directory
 * 3. move decompressed content to desired destination
 * 4. remove temporary directory
 * 5. remove gzipped file
 *
 * There should be a cleaner more direct route, but I haven't discovered it yet.
 * 
 * @author ktanaka
 */
public class MultiStep {
    FileSystemManager fsManager = null;
    static String extractDirname = "/extra/data/tryVfs";
    LocalFile extractDir = null;
    
    /**
     * Extract files from a tar file. If the file extracted is gzipped,
     * decompress it and remove the gzipped version.
     * @param args command line arguments are currently not used
     */
    public static void main( String[] args ) {
        MultiStep msExtract = new MultiStep();
        
        try {
            msExtract.fsManager = VFS.getManager();
        } catch (FileSystemException ex) {
            throw new RuntimeException("failed to get fsManager from VFS", ex);
        }
        
        try {
            msExtract.extractDir = (LocalFile) msExtract.fsManager.resolveFile("file://"
                    + extractDirname);
            if (! msExtract.extractDir.exists()) {
                msExtract.extractDir.createFolder();
            }
        } catch (FileSystemException ex) {
            throw new RuntimeException("failed to prepare extract directory " 
                    + extractDirname, ex);
        }
        
        
        /* Create a tarFile object */
        FileObject tarFile;
        try {
            System.out.println("Resolve tar file:");
            tarFile = msExtract.fsManager.resolveFile(
                    "tar:/extra/data/tryVfs/archive.tar");
            
            FileName tarFileName = tarFile.getName();
            System.out.println("  Path     : " + tarFileName.getPath());
            System.out.println("  URI      : " + tarFileName.getURI());
        } catch (Exception ex) {
            throw new RuntimeException("failed to open tar file ", ex);
        }
        
        /* Work on files inside tarFile */
        FileObject[] children;
        try {
            children = tarFile.getChildren();
        } catch (FileSystemException ex) {
            throw new RuntimeException("failed to get contents of tarfile ", ex);
        }
        
        for (FileObject f : children) {
            msExtract.processChild(f);
        }
        
    } // main( String[] args )
    
    private void processChild(FileObject f) {
        try {
            if (f.getType() == FileType.FOLDER) {
                // Recursively process files in this folder
                FileObject[] children = f.getChildren();
                for (FileObject subfile : children) {
                    processChild(subfile);
                }
            } else {
                FileName fname = f.getName();
                String extractName = new String(this.extractDir.getName() + "/"
                        + fname.getBaseName());
                System.out.println("Extracting " + extractName);
                LocalFile extractFile = (LocalFile) this.fsManager.resolveFile(extractName);
                extractFile.copyFrom(f, new AllFileSelector());
                
                // if the file is gzipped, decompress it
                if (extractFile.getName().getExtension().equals("gz")) {
                    System.out.println("Decompressing " + extractName);
                    String gzName = new String("gz://" + extractFile.getName().getPath());
                    System.out.println("gzName=" + gzName);
                    FileObject gzFile = this.fsManager.resolveFile(gzName);
                    String fileName = extractFile.getName().getBaseName().replaceAll(".gz$", "");
                    
                    // The decompressed path we want
                    String decompName = new String(this.extractDir.getName() + "/" 
                            + fileName);
                    
                    // A temporary Directory
                    String tmpDirname = new String(this.extractDir.getName() + "/" 
                            + fileName + ".tmp");
                    
                    // A temporary file path
                    String tmpFilename = new String(tmpDirname + "/" + fileName);
                    
                    // Some debug lines
                    System.out.println("fileName   =" + fileName);
                    System.out.println("decompName =" + decompName);
                    System.out.println("tmpDirname =" + tmpDirname);
                    System.out.println("tmpFilename=" + tmpFilename);
                    
                    // Extracting from gzip file ends up with a directory containing what
                    // we want.
                    LocalFile tmpDir = (LocalFile) this.fsManager.resolveFile(tmpDirname);
                    tmpDir.copyFrom(gzFile, new FileTypeSelector(FileType.FILE));
                    
                    // Move the uncompressed file to the location desired.
                    LocalFile tmpFile = (LocalFile) this.fsManager.resolveFile(tmpFilename);
                    LocalFile decompFile = (LocalFile) this.fsManager.resolveFile(decompName);
                    tmpFile.moveTo(decompFile);
                    
                    // Delete the temporary directory.
                    tmpDir.delete(new AllFileSelector());
                    
                    // Delete the gzip file now that we have the uncompressed version.
                    // Note that the plain file FileObject (extractFile) is used 
                    // for deleting instead of the gzip FileObject (gzFile).
                    extractFile.delete(new AllFileSelector());
                }
            }
        } catch (FileSystemException ex) {
            ex.printStackTrace();
            throw new RuntimeException("Error working on tarfile object " + f.getName());
        }
    } // processChild(FileObject f)
}