source: data_recovery/trunk/salvage_data.py

Last change on this file was 28, checked in by retracile, 15 years ago

Data Recovery: a couple data recovery utilties

  • Property svn:executable set to *
File size: 3.5 KB
RevLine 
[28]1#!/usr/bin/python
2"""salvage_data
3
4This is a tool for recovering data from a partially failing drive.  It will
5start at the beginning of the device, and start reading sequentially until it
6hits a problem, and will then split the remaining work to avoid bad sections as
7long as possible.
8"""
9import sys
10
11class WorkQueue(object):
12    """Queue for the extents left to attempt to read."""
13    def __init__(self, start, end):
14        self.queue = [(start, end)]
15
16    def is_empty(self):
17        """Lets us know when we are done"""
18        return not self.queue
19
20    def pop(self):
21        """Returns a (start, end) tuple of the next section to attempt"""
22        return self.queue.pop()
23
24    def push(self, start, end, split=False):
25        """Adds the start, end section to the queue, splitting if told to"""
26        if start >= end:
27            # nothing to queue
28            return
29        if split:
30            middle = (start + end) / 2
31            if start < middle:
32                self.queue.insert(0, (start, middle))
33            if middle < end:
34                self.queue.append((middle, end))
35        else:
36            self.queue.append((start, end))
37
38def recover(drive, start, end, output):
39    """Recovers data from the provided file object within the given start and
40    end, and writes the log to the output file object.
41    """
42    extent_size = 1024*1024 # Size of blocks to attempt to read
43    # queue of data ranges to try to recover
44    queue = WorkQueue(start, end)
45
46    while not queue.is_empty():
47        start, end = queue.pop()
48        if end - start < extent_size:
49            chunk = end - start
50        else:
51            chunk = extent_size
52        try:
53            drive.seek(start)
54            data = drive.read(chunk)
55            error = False
56        except IOError:
57            error = True
58
59        if error:
60            # error reading at this offset
61            output.write("E %s\n" % start)
62            output.flush()
63            queue.push(start+1, end, True)
64        elif not data:
65            # error reading at this offset; I think this only happened due to a
66            # software bug
67            output.write("S %s\n" % start)
68            output.flush()
69            queue.push(start+1, end, True)
70        else:
71            output.write("D %s %s\n%s\n" % (start, len(data), data))
72            output.flush()
73            # if we had a short read, probably an error at the end of this, so
74            # split the extent
75            queue.push(start+len(data), end, len(data)<chunk)
76
77def main(args):
78    """Takes the device to read as the first argument.  May optionally specify
79    the start and end offsets to recover.
80    """
81    if len(args) < 1:
82        sys.stderr.write("Syntax error: %s <inputfile> [start [end]]\n"
83            "Reads from the specified input file (device)\n"
84            "Optionally starts at the specified start offset and ends at the "
85            "specified end offset.\n"
86            "If not specified, start defaults to 0, and end defaults to the "
87            "end of the device.\n"
88        )
89        sys.exit(1)
90
91    device = args[0]
92
93    # determine device size
94    drive = open(device, 'r')
95    drive.seek(0, 2)
96    end = drive.tell()
97
98    # see if the user has specified a range to recover
99    start = 0
100    try:
101        start = int(args[1])
102        end = int(args[2])
103    except IndexError:
104        pass
105
106    sys.stderr.write("Reading %s from %s to %s\n" % (device, start, end))
107    sys.stderr.flush()
108
109    output = sys.stdout
110
111    recover(drive, start, end, output)
112
113if __name__ == '__main__':
114    main(sys.argv[1:])
Note: See TracBrowser for help on using the repository browser.