casacore
MultiFileBase.h
Go to the documentation of this file.
1//# MultiFileBase.h: Abstract base class to combine multiple files in a single one
2//# Copyright (C) 2014
3//# Associated Universities, Inc. Washington DC, USA.
4//#
5//# This library is free software; you can redistribute it and/or modify it
6//# under the terms of the GNU Library General Public License as published by
7//# the Free Software Foundation; either version 2 of the License, or (at your
8//# option) any later version.
9//#
10//# This library is distributed in the hope that it will be useful, but WITHOUT
11//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13//# License for more details.
14//#
15//# You should have received a copy of the GNU Library General Public License
16//# along with this library; if not, write to the Free Software Foundation,
17//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18//#
19//# Correspondence concerning AIPS++ should be addressed as follows:
20//# Internet email: aips2-request@nrao.edu.
21//# Postal address: AIPS++ Project Office
22//# National Radio Astronomy Observatory
23//# 520 Edgemont Road
24//# Charlottesville, VA 22903-2475 USA
25//#
26//# $Id: RegularFileIO.h 20551 2009-03-25 00:11:33Z Malte.Marquarding $
27
28#ifndef CASA_MULTIFILEBASE_H
29#define CASA_MULTIFILEBASE_H
30
31//# Includes
32#include <casacore/casa/aips.h>
33#include <casacore/casa/IO/ByteIO.h>
34#include <casacore/casa/BasicSL/String.h>
35#include <casacore/casa/Utilities/CountedPtr.h>
36#include <casacore/casa/vector.h>
37#include <casacore/casa/ostream.h>
38
39
40namespace casacore { //# NAMESPACE CASACORE - BEGIN
41
42 //# Forward declaration.
43 class AipsIO;
44 class HDF5Group;
45 class HDF5DataSet;
46
47
48 // <summary>
49 // Helper class for MultiFileInfo holding a data buffer
50 // </summary>
51 // <synopsis>
52 // The buffer can be allocated with posix_memalign (for O_DIRECT support).
53 // Hence the memory must be freed using free, which makes it impossible
54 // to use a shared_ptr to that memory. Hence it is encapsulated in this class.
55 // </synopsis>
57 MultiFileBuffer (size_t bufSize, Bool useODirect);
59 { if (data) free (data); }
60 // Data member
61 char* data;
62 private:
65 };
66
67 // <summary>
68 // Helper class for MultiFileBase containing info per internal file.
69 // </summary>
70 // <synopsis>
71 // This struct defines the various fields describing a logical file in a
72 // class derived from MultiFileBase (such as MultiFile or MultiHDF5).
73 // </synopsis>
74 // <use visibility=local>
76 // Initialize the object and create the buffer with the proper size.
77 // If align>1 (for use of O_DIRECT), the buffer is properly aligned and it
78 // is ensured that its size is a multiple of the alignment.
79 explicit MultiFileInfo (Int64 bufSize=0, Bool useODirect=False);
80 // Allocate the buffer.
81 void allocBuffer (Int64 bufSize, Bool useODirect=False)
82 { buffer = std::shared_ptr<MultiFileBuffer> (new MultiFileBuffer(bufSize, useODirect)); }
83 //# Data members.
84 vector<Int64> blockNrs; // physical blocknrs for this logical file
85 Int64 curBlock; // the data block held in buffer (<0 is none)
86 Int64 fsize; // file size (in bytes)
87 String name; // the virtual file name
88 Bool dirty; // has data in buffer been changed?
89 std::shared_ptr<MultiFileBuffer> buffer; // buffer holding a data block
92 };
93 void operator<< (ostream&, const MultiFileInfo&);
96
97
98 // <summary>
99 // Abstract base class to combine multiple files in a single one.
100 // </summary>
101
102 // <use visibility=export>
103
104 // <reviewed reviewer="" date="" tests="tMultiFile" demos="">
105 // </reviewed>
106
107 // <synopsis>
108 // This class is a container file holding multiple virtual files. It is
109 // primarily meant as a container file for the storage manager files of a
110 // table to reduce the number of files used (especially for Lustre) and to
111 // reduce the number of open files (especially when concatenating tables).
112 // <br>A secondary goal is offering the ability to use an IO buffer size
113 // that matches the file system well (large buffer size for e.g. ZFS).
114 //
115 // The SetupNewTable constructor has a StorageOption argument to define
116 // if a MultiFile has to be used and if so, the buffer size to use.
117 // It is also possible to specify that through aipsrc variables.
118 //
119 // A virtual file is spread over multiple (fixed size) data blocks in the
120 // MultiFile. A data block is never shared by multiple files.
121 // For each virtual file MultiFile keeps a MultiFileInfo object telling
122 // the file size and the blocks numbers used for the file. When flushing
123 // the MultiFile, this meta info is written into a header block and,
124 // if needed, continuation blocks. On open and resync, it is read back.
125 // <br>
126 //
127 // A virtual file is represented by an MFFileIO object, which is derived
128 // from ByteIO and as such part of the casacore IO framework. It makes it
129 // possible for applications to access a virtual file in the same way as
130 // a regular file.
131 //
132 // It is possible to delete a virtual file. Its blocks will be added to
133 // the free block list (which is also stored in the meta info).
134 // </synopsis>
135
136 // <example>
137 // In principle it is possible to use the MultiFile functions directly.
138 // However, in general it is much easier to use an MFFileIO object
139 // per virtual file as shown below.
140 // <srcblock>
141 // // Create a new MultiFile using a block size of 1 MB.
142 // MultiFile mfile("file.mf', ByteIO::New, 1048576);
143 // // Create a virtual file in it.
144 // MFFileIO mf1(mfile, "mf1", ByteIO::New);
145 // // Use it (for example) as the sink of AipsIO.
146 // AipsIO stream (&mf1);
147 // // Write values.
148 // stream << (Int)10;
149 // stream << True;
150 // // Seek to beginning of file and read data in.
151 // stream.setpos (0);
152 // Int vali;
153 // Bool valb;
154 // stream >> vali >> valb;
155 // </srcblock>
156 // </example>
157
158 // <todo>
159 // <li> write headers at alternating file positions (for robustness)
160 // <li> possibly write headers entirely at the end if larger than blocksize
161 // </todo>
162
163
165 {
166 public:
167 // Open or create a MultiFileBase with the given name.
168 // Upon creation the block size can be given. If 0, it uses the block size
169 // of the file system the file is on.
170 // If useODIrect=True, it means that O_DIRECT is used. If the OS does not
171 // support it, the flag will always be False. If True, the data buffers will
172 // have a proper alignment and size (as needed by O_DIRECT).
174
175 // The destructor flushes and closes the file.
176 virtual ~MultiFileBase();
177
178 // Return the file id of a file in the MultiFileBase object.
179 // If the name is unknown, an exception is thrown if throwExcp is set.
180 // Otherwise it returns -1.
181 Int fileId (const String& name, Bool throwExcp=True) const;
182
183 // Add a file to the MultiFileBase object. It returns the file id.
184 // Only the base name of the given file name is used. In this way the
185 // MultiFileBase container file can be moved.
186 Int addFile (const String& name);
187
188 // Delete a file. It adds its blocks to the free block list.
190
191 // Read a block at the given offset. It returns the actual size read.
192 Int64 read (Int fileId, void* buffer, Int64 size, Int64 offset);
193
194 // Write a block at the given offset. It returns the actual size written.
195 Int64 write (Int fileId, const void* buffer, Int64 size, Int64 offset);
196
197 // Flush the file by writing all dirty data and all header info.
198 void flush();
199
200 // Resync with another process by clearing the buffers and rereading
201 // the header. The header is only read if its counter has changed.
202 void resync();
203
204 // Reopen the underlying file for read/write access.
205 // Nothing will be done if the file is writable already.
206 // Otherwise it will be reopened and an exception will be thrown
207 // if it is not possible to reopen it for read/write access.
208 virtual void reopenRW() = 0;
209
210 // Fsync the file (i.e., force the data to be physically written).
211 virtual void fsync() = 0;
212
213 // Get the file name of the MultiFileBase.
215 { return itsName; }
216
217 // Is the file writable?
219 { return itsWritable; }
220
221 // Will O_DIRECT be used?
223 { return itsUseODirect; }
224
225 // Get the block size used.
227 { return itsBlockSize; }
228
229 // Get the nr of virtual files.
230 uInt nfile() const;
231
232 // Get the total nr of data blocks used.
233 Int64 size() const
234 { return itsNrBlock; }
235
236 // Get the info object (for test purposes mainly).
237 const vector<MultiFileInfo>& info() const
238 { return itsInfo; }
239
240 // Get the free blocks (for test purposes mainly).
241 const vector<Int64>& freeBlocks() const
242 { return itsFreeBlocks; }
243
244 private:
246 {
247 writeBlock (info, info.curBlock, info.buffer->data);
248 info.dirty = False;
249 }
250
251 // Do the class-specific actions on adding a file.
252 virtual void doAddFile (MultiFileInfo&) = 0;
253 // Do the class-specific actions on deleting a file.
254 virtual void doDeleteFile (MultiFileInfo&) = 0;
255 // Flush the file itself.
256 virtual void flushFile() = 0;
257 // Flush and close the file.
258 virtual void close() = 0;
259 // Write the header info.
260 virtual void writeHeader() = 0;
261 // Read the header info. If always==False, the info is only read if the
262 // header counter has changed.
263 virtual void readHeader (Bool always=True) = 0;
264 // Extend the virtual file to fit lastblk.
265 virtual void extend (MultiFileInfo& info, Int64 lastblk) = 0;
266 // Write a data block.
267 virtual void writeBlock (MultiFileInfo& info, Int64 blknr,
268 const void* buffer) = 0;
269 // Read a data block.
270 virtual void readBlock (MultiFileInfo& info, Int64 blknr,
271 void* buffer) = 0;
272
273 protected:
274 // Set the flags and blockSize for a new MultiFile/HDF5.
276
277 //# Data members
279 Int64 itsBlockSize; // The blocksize used
280 Int64 itsNrBlock; // The total nr of blocks actually used
281 Int64 itsHdrCounter; // Counter of header changes
282 vector<MultiFileInfo> itsInfo;
283 std::shared_ptr<MultiFileBuffer> itsBuffer;
284 Bool itsUseODirect; // use O_DIRECT?
285 Bool itsWritable; // Is the file writable?
286 Bool itsChanged; // Has header info changed since last flush?
287 vector<Int64> itsFreeBlocks;
288 };
289
290
291} //# NAMESPACE CASACORE - END
292
293#endif
Referenced counted pointer for constant data.
Definition: CountedPtr.h:81
Abstract base class to combine multiple files in a single one.
Int64 blockSize() const
Get the block size used.
virtual void extend(MultiFileInfo &info, Int64 lastblk)=0
Extend the virtual file to fit lastblk.
virtual void writeHeader()=0
Write the header info.
MultiFileBase(const String &name, Int blockSize, Bool useODirect)
Open or create a MultiFileBase with the given name.
const vector< Int64 > & freeBlocks() const
Get the free blocks (for test purposes mainly).
virtual void fsync()=0
Fsync the file (i.e., force the data to be physically written).
virtual void readHeader(Bool always=True)=0
Read the header info.
Int64 write(Int fileId, const void *buffer, Int64 size, Int64 offset)
Write a block at the given offset.
void resync()
Resync with another process by clearing the buffers and rereading the header.
vector< Int64 > itsFreeBlocks
vector< MultiFileInfo > itsInfo
Int addFile(const String &name)
Add a file to the MultiFileBase object.
Int fileId(const String &name, Bool throwExcp=True) const
Return the file id of a file in the MultiFileBase object.
const vector< MultiFileInfo > & info() const
Get the info object (for test purposes mainly).
Int64 size() const
Get the total nr of data blocks used.
void deleteFile(Int fileId)
Delete a file.
Int64 read(Int fileId, void *buffer, Int64 size, Int64 offset)
Read a block at the given offset.
String fileName() const
Get the file name of the MultiFileBase.
Bool useODirect() const
Will O_DIRECT be used?
virtual ~MultiFileBase()
The destructor flushes and closes the file.
Bool isWritable() const
Is the file writable?
virtual void doDeleteFile(MultiFileInfo &)=0
Do the class-specific actions on deleting a file.
virtual void writeBlock(MultiFileInfo &info, Int64 blknr, const void *buffer)=0
Write a data block.
virtual void reopenRW()=0
Reopen the underlying file for read/write access.
virtual void readBlock(MultiFileInfo &info, Int64 blknr, void *buffer)=0
Read a data block.
virtual void flushFile()=0
Flush the file itself.
virtual void close()=0
Flush and close the file.
void setNewFile()
Set the flags and blockSize for a new MultiFile/HDF5.
virtual void doAddFile(MultiFileInfo &)=0
Do the class-specific actions on adding a file.
void flush()
Flush the file by writing all dirty data and all header info.
uInt nfile() const
Get the nr of virtual files.
std::shared_ptr< MultiFileBuffer > itsBuffer
void writeDirty(MultiFileInfo &info)
String: the storage and methods of handling collections of characters.
Definition: String.h:225
free(pool)
this file contains all the compiler specific defines
Definition: mainpage.dox:28
const Bool False
Definition: aipstype.h:44
AipsIO & operator>>(AipsIO &os, Record &rec)
Definition: Record.h:465
ostream & operator<<(ostream &os, const IComplex &)
Show on ostream.
unsigned int uInt
Definition: aipstype.h:51
long long Int64
Define the extra non-standard types used by Casacore (like proposed uSize, Size)
Definition: aipsxtype.h:38
int Int
Definition: aipstype.h:50
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:42
const Bool True
Definition: aipstype.h:43
char * data
Data member.
Definition: MultiFileBase.h:61
MultiFileBuffer(const MultiFileBuffer &)
MultiFileBuffer(size_t bufSize, Bool useODirect)
MultiFileBuffer & operator=(const MultiFileBuffer &)
Helper class for MultiFileBase containing info per internal file.
Definition: MultiFileBase.h:75
void allocBuffer(Int64 bufSize, Bool useODirect=False)
Allocate the buffer.
Definition: MultiFileBase.h:81
vector< Int64 > blockNrs
Definition: MultiFileBase.h:84
std::shared_ptr< MultiFileBuffer > buffer
Definition: MultiFileBase.h:89
CountedPtr< HDF5Group > group
Definition: MultiFileBase.h:90
MultiFileInfo(Int64 bufSize=0, Bool useODirect=False)
Initialize the object and create the buffer with the proper size.
CountedPtr< HDF5DataSet > dataSet
Definition: MultiFileBase.h:91