MergeDataFiles.java

Go to the documentation of this file.
00001 package edu.rice.cs.hpc.data.util;
00002 
00003 import java.io.DataInputStream;
00004 import java.io.DataOutputStream;
00005 import java.io.File;
00006 import java.io.FileInputStream;
00007 import java.io.FileNotFoundException;
00008 import java.io.FileOutputStream;
00009 import java.io.IOException;
00010 import java.io.RandomAccessFile;
00011 
00012 
00013 
00014 /*****
00015  * Adaptation of TraceCompactor from traceviewer for more general purpose
00016  *  
00017  * Example of input files: 
00018  * 
00019  * s3d_f90.x-000002-000-a8c01d31-26093.hpctrace
00020  * 1.fft-000142-000-a8c0a230-23656.metric-db
00021  * 
00022  * Example of output file:
00023  * 
00024  * data.hpctrace.single
00025  * data.metric-db.single
00026  * 
00027  * @author laksono 
00028  *
00029  */
00030 public class MergeDataFiles {
00031     
00032     private static final int PAGE_SIZE_GUESS = 4096;
00033     
00034     private static final int PROC_POS = 5;
00035     private static final int THREAD_POS = 4;
00036     
00037     public enum MergeDataAttribute {SUCCESS_MERGED, SUCCESS_ALREADY_CREATED, FAIL_NO_DATA};
00038     
00039     /***
00040      * create a single file from multiple data files
00041      * 
00042      * @param directory
00043      * @param globInputFile: glob pattern
00044      * @param outputFile: output filename
00045      * 
00046      * @return
00047      * @throws IOException
00048      */
00049     static public MergeDataAttribute merge(File directory, String globInputFile, String outputFile, IProgressReport progress)
00050             throws IOException, FileNotFoundException {
00051         
00052         final int last_dot = globInputFile.lastIndexOf('.');
00053         final String suffix = globInputFile.substring(last_dot);
00054 
00055         final File fout = new File(outputFile);
00056         
00057         // check if the file already exists
00058         if (fout.canRead() )
00059         {
00060             if (isMergedFileCorrect(outputFile))            
00061                 return MergeDataAttribute.SUCCESS_ALREADY_CREATED;
00062             // the file exists but corrupted. In this case, we have to remove and create a new one
00063             throw new RuntimeException("MT file corrupted.");
00064         }
00065         
00066         // check if the files in glob patterns is correct
00067         File[] file_metric = directory.listFiles( new Util.FileThreadsMetricFilter(globInputFile) );
00068         if (file_metric == null || file_metric.length<1)
00069             return MergeDataAttribute.FAIL_NO_DATA;
00070         
00071         FileOutputStream fos = new FileOutputStream(outputFile);
00072         DataOutputStream dos = new DataOutputStream(fos);
00073         
00074         //-----------------------------------------------------
00075         // 1. write the header:
00076         //  int type (0: unknown, 1: mpi, 2: openmp, 3: hybrid, ...
00077         //  int num_files
00078         //-----------------------------------------------------
00079 
00080         int type = 0;
00081         dos.writeInt(type);
00082         
00083         progress.begin("Merging data files ...", file_metric.length);
00084         
00085         // on linux, we have to sort the files
00086         java.util.Arrays.sort(file_metric);
00087         
00088         dos.writeInt(file_metric.length);
00089         
00090         final long num_metric_header = 2 * Constants.SIZEOF_INT; // type of app (4 bytes) + num procs (4 bytes) 
00091         final long num_metric_index  = file_metric.length * (Constants.SIZEOF_LONG + 2 * Constants.SIZEOF_INT );
00092         long offset = num_metric_header + num_metric_index;
00093 
00094         int name_format = 0;  // FIXME hack:some hpcprof revisions have different format name !!
00095                 
00096         //-----------------------------------------------------
00097         // 2. Record the process ID, thread ID and the offset 
00098         //   It will also detect if the application is mp, mt, or hybrid
00099         //   no accelator is supported
00100         //  for all files:
00101         //      int proc-id, int thread-id, long offset
00102         //-----------------------------------------------------
00103         for(int i = 0; i < file_metric.length; ++i)
00104         {
00105             //get the core number and thread number
00106             final String filename = file_metric[i].getName();
00107             final int last_pos_basic_name = filename.length() - suffix.length();
00108             final String basic_name = file_metric[i].getName().substring(0, last_pos_basic_name);
00109             String []tokens = basic_name.split("-");
00110             
00111             final int num_tokens = tokens.length;
00112             if (num_tokens < PROC_POS)
00113                 // if it is wrong file with the right extension, we skip 
00114                 continue;
00115             
00116             int proc ;
00117             try {
00118                 proc = Integer.parseInt(tokens[name_format + num_tokens-PROC_POS]);
00119             } catch (NumberFormatException e) {
00120                 // old version of name format
00121                 name_format = 1; 
00122                 proc = Integer.parseInt(tokens[name_format + num_tokens-PROC_POS]);
00123             }
00124             dos.writeInt(proc);
00125             if (proc != 0)
00126                 type |= Constants.MULTI_PROCESSES;
00127             
00128             final int thread = Integer.parseInt(tokens[name_format + num_tokens-THREAD_POS]);
00129             dos.writeInt(thread);
00130             if (thread != 0)
00131                 type |= Constants.MULTI_THREADING;
00132             
00133 
00134             dos.writeLong(offset);
00135             offset += file_metric[i].length();
00136 
00137         }
00138         
00139         //-----------------------------------------------------
00140         // 3. Copy all data from the multiple files into one file
00141         //-----------------------------------------------------
00142         for(int i = 0; i < file_metric.length; ++i) {
00143             DataInputStream dis = new DataInputStream(new FileInputStream(file_metric[i]));
00144             byte[] data = new byte[PAGE_SIZE_GUESS];
00145             
00146             int numRead = dis.read(data);
00147             while(numRead > 0) {
00148                 dos.write(data, 0, numRead);
00149                 numRead = dis.read(data);
00150             }
00151             dis.close();
00152             
00153             progress.advance();
00154         }       
00155         insertMarker(dos);
00156         
00157         dos.close();
00158         
00159         //-----------------------------------------------------
00160         // 4. FIXME: write the type of the application
00161         //      the type of the application is computed in step 2
00162         //      Ideally, this step has to be in the beginning !
00163         //-----------------------------------------------------
00164         RandomAccessFile f = new RandomAccessFile(outputFile, "rw");
00165         f.writeInt(type);
00166         f.close();
00167         
00168         //-----------------------------------------------------
00169         // 5. remove old files
00170         //-----------------------------------------------------
00171         removeFiles(file_metric);
00172         
00173         progress.end();
00174         
00175         return MergeDataAttribute.SUCCESS_MERGED;
00176 
00177     }
00178     
00179     // pat2 7/24/13: The marker used to be:
00180     //static private long MARKER_END_MERGED_FILE = 0xDEADF00D;
00181     // but Java sign-extends the int to a long and it becomes
00182     // 0xFFFFFFFFDEADF00D NOT 0x00000000DEADF00D, like you'd probably guess.
00183     // Making it explicitly what it was implicitly before to avoid 
00184     // compatibility issues.
00186     static private long MARKER_END_MERGED_FILE = 0xFFFFFFFFDEADF00Dl;
00187     
00188     /***
00189      * insert a marker at the end of the file
00190      * @param dos: output stream. It has to be the end of the file
00191      * @throws IOException
00192      */
00193     static private void insertMarker(DataOutputStream dos) throws IOException
00194     {
00195         dos.writeLong(MARKER_END_MERGED_FILE);
00196     }
00197     
00198     
00199     /***
00200      * Check if a file is a good merged file
00201      * @param filename
00202      * @return
00203      * @throws IOException
00204      */
00205     static private boolean isMergedFileCorrect(String filename) throws IOException
00206     {
00207         final RandomAccessFile f = new RandomAccessFile(filename, "r");
00208         boolean isCorrect = false;
00209         
00210         final long pos = f.length() - Constants.SIZEOF_LONG;
00211         if (pos>0) {
00212             f.seek(pos);
00213             final long marker = f.readLong();
00214             isCorrect = (marker == MARKER_END_MERGED_FILE);
00215         }
00216         f.close();
00217         return isCorrect;
00218     }
00219     
00220     static private boolean removeFiles(File files[])
00221     {
00222         boolean success = true;
00223         
00224         for(File file: files) {
00225             success &= file.delete();
00226         }
00227         
00228         return success;
00229     }
00230 }

Generated on 5 May 2015 for HPCVIEWER by  doxygen 1.6.1