source: data_recovery/trunk/recover_log.py @ 69

Last change on this file since 69 was 28, checked in by retracile, 15 years ago

Data Recovery: a couple data recovery utilties

  • Property svn:executable set to *
File size: 5.6 KB
RevLine 
[28]1#!/usr/bin/python
2"""Recovers a disk image from a log file generated by salvage_data.py.
3"""
4import sys
5
6def coalesce_extents(one, two):
7    """ASSUMES the extents overlap."""
8    return (min(one[0], two[0]), max(one[1], two[1]))
9
10def extents_overlap(one, two):
11    """returns True if they touch or overlap in some way, False if they don't.
12    """
13    return one[0] <= two[0] <= one[1] or \
14        one[0] <= two[1] <= one[1] or \
15        two[0] <= one[0] <= two[1] or \
16        two[0] <= one[1] <= two[1]
17
18class Extents(object):
19    """Tracks the extents that have been covered by the log."""
20    def __init__(self):
21        self.extents = []
22
23    def add(self, offset, length):
24        """Adds an extent at offset of the given length."""
25        self.add_extent((offset, offset+length))
26
27    def add_extent(self, extent):
28        """Adds an extent."""
29        start, end = extent
30        if start == end: # nothing to add
31            return
32
33        # find any existing extent that overlaps with the one being added.
34        for i in range(len(self.extents)):
35            current_extent = self.extents[i]
36            if extents_overlap(extent, current_extent):
37                self.extents.pop(i)
38                new_extent = coalesce_extents(current_extent, extent)
39                self.add_extent(new_extent)
40                return
41
42        # No coalescing needed
43        self.extents.append(extent)
44
45    def bytes_covered(self):
46        """Returns the number of bytes covered by the extents."""
47        return sum([b-a for a, b in self.extents])
48
49    def byte_range(self):
50        """Returns a tuple of the starting and ending offsets covered by these
51        extents.
52        """
53        self.extents.sort()
54        return (self.extents[0][0], self.extents[-1][1])
55
56    def __str__(self):
57        start, end = self.byte_range()
58        covered = self.bytes_covered()
59        return ' '.join([repr(e) for e in self.extents]) + \
60            "\n%s of %s bytes covered (%s remain)" % (covered, end-start,
61            end-start-covered)
62
63def report_log(good, bad):
64    """let the user know what extents have been accounted for"""
65    sys.stderr.write("Good extents: %s\nBad extents: %s\n" % (good, bad))
66
67def write_image_from_log(log, image):
68    """Reads from the log file object, and writes the data to the image file
69    object.
70    """
71    good_extents = Extents()
72    bad_extents = Extents()
73    try:
74        while True:
75            meta = log.readline().split()
76            if not meta:
77                break
78            if meta[0] == 'D': # data
79                offset = long(meta[1])
80                length = long(meta[2])
81                data = log.read(length)
82                if len(data) != length:
83                    raise Exception("Short line: %s of %s bytes at offset %s" \
84                        % (len(data), length, offset))
85                log.read(1) # the extra newline
86
87                sys.stderr.write("writing %s bytes at %s\n" % (length, offset))
88                image.seek(offset)
89                image.write(data)
90                good_extents.add(offset, length)
91            elif meta[0] == 'E':
92                offset = long(meta[1])
93                if len(meta) > 2:
94                    length = long(meta[2])
95                else:
96                    length = 1
97                sys.stderr.write("skipping %s bad bytes at %s\n" % (length,
98                    offset))
99                bad_extents.add(offset, length)
100            else:
101                raise Exception("Invalid line: %r" % (meta,))
102    except:
103        report_log(good_extents, bad_extents)
104        raise
105
106    report_log(good_extents, bad_extents)
107    return good_extents, bad_extents
108
109def write_log_from_image(image, out, good_extents, bad_extents):
110    """Write out a concise log file with the same information as the input
111    file.
112    """
113    max_extent_size = 10*1024**2
114
115    extents = [(s, e, 'D') for s, e in good_extents.extents] + \
116              [(s, e, 'E') for s, e in bad_extents.extents]
117    extents.sort()
118    for start, end, state in extents:
119        if state == 'E':
120            out.write("E %s %s\n" % (start, end - start))
121            out.flush()
122        elif state == 'D':
123            offset = start
124            while offset < end:
125                image.seek(offset)
126                chunk = min(max_extent_size, end-offset)
127                data = image.read(chunk)
128                if len(data) != chunk:
129                    raise Exception("Short read from image file")
130                out.write("D %s %s\n%s\n" % (offset, chunk, data))
131                out.flush()
132                offset += chunk
133        else:
134            raise Exception("INTERNAL ERROR: Invalid state \"%s\"" % state)
135
136
137def usage(out):
138    """outputs help message"""
139    out.write("Syntax Error: %s [-l] <imagefilename>\n"
140            "Requires the file to which to write the image, and reads the "
141            "recovery log from stdin.\n"
142            "/dev/null may be specified as the image filename to just show "
143            "summary information.\n"
144    )
145
146def main(args):
147    """Reads log from standard in, writes an image to the image file"""
148    if len(args) < 1:
149        usage(sys.stderr)
150        sys.exit(1)
151
152    output_log = False
153    if args[0] == '-l':
154        output_log = True
155        if len(args) != 2:
156            usage(sys.stderr)
157            sys.exit(1)
158        filename = args[1]
159    else:
160        if len(args) != 1:
161            usage(sys.stderr)
162            sys.exit(1)
163        filename = args[0]
164
165    image = open(filename, 'w')
166
167    good, bad = write_image_from_log(sys.stdin, image)
168    if output_log:
169        write_log_from_image(open(filename, 'r'), sys.stdout, good, bad)
170
171if __name__ == '__main__':
172    main(sys.argv[1:])
Note: See TracBrowser for help on using the repository browser.