[28] | 1 | #!/usr/bin/python |
---|
| 2 | """salvage_data |
---|
| 3 | |
---|
| 4 | This is a tool for recovering data from a partially failing drive. It will |
---|
| 5 | start at the beginning of the device, and start reading sequentially until it |
---|
| 6 | hits a problem, and will then split the remaining work to avoid bad sections as |
---|
| 7 | long as possible. |
---|
| 8 | """ |
---|
| 9 | import sys |
---|
| 10 | |
---|
| 11 | class WorkQueue(object): |
---|
| 12 | """Queue for the extents left to attempt to read.""" |
---|
| 13 | def __init__(self, start, end): |
---|
| 14 | self.queue = [(start, end)] |
---|
| 15 | |
---|
| 16 | def is_empty(self): |
---|
| 17 | """Lets us know when we are done""" |
---|
| 18 | return not self.queue |
---|
| 19 | |
---|
| 20 | def pop(self): |
---|
| 21 | """Returns a (start, end) tuple of the next section to attempt""" |
---|
| 22 | return self.queue.pop() |
---|
| 23 | |
---|
| 24 | def push(self, start, end, split=False): |
---|
| 25 | """Adds the start, end section to the queue, splitting if told to""" |
---|
| 26 | if start >= end: |
---|
| 27 | # nothing to queue |
---|
| 28 | return |
---|
| 29 | if split: |
---|
| 30 | middle = (start + end) / 2 |
---|
| 31 | if start < middle: |
---|
| 32 | self.queue.insert(0, (start, middle)) |
---|
| 33 | if middle < end: |
---|
| 34 | self.queue.append((middle, end)) |
---|
| 35 | else: |
---|
| 36 | self.queue.append((start, end)) |
---|
| 37 | |
---|
| 38 | def recover(drive, start, end, output): |
---|
| 39 | """Recovers data from the provided file object within the given start and |
---|
| 40 | end, and writes the log to the output file object. |
---|
| 41 | """ |
---|
| 42 | extent_size = 1024*1024 # Size of blocks to attempt to read |
---|
| 43 | # queue of data ranges to try to recover |
---|
| 44 | queue = WorkQueue(start, end) |
---|
| 45 | |
---|
| 46 | while not queue.is_empty(): |
---|
| 47 | start, end = queue.pop() |
---|
| 48 | if end - start < extent_size: |
---|
| 49 | chunk = end - start |
---|
| 50 | else: |
---|
| 51 | chunk = extent_size |
---|
| 52 | try: |
---|
| 53 | drive.seek(start) |
---|
| 54 | data = drive.read(chunk) |
---|
| 55 | error = False |
---|
| 56 | except IOError: |
---|
| 57 | error = True |
---|
| 58 | |
---|
| 59 | if error: |
---|
| 60 | # error reading at this offset |
---|
| 61 | output.write("E %s\n" % start) |
---|
| 62 | output.flush() |
---|
| 63 | queue.push(start+1, end, True) |
---|
| 64 | elif not data: |
---|
| 65 | # error reading at this offset; I think this only happened due to a |
---|
| 66 | # software bug |
---|
| 67 | output.write("S %s\n" % start) |
---|
| 68 | output.flush() |
---|
| 69 | queue.push(start+1, end, True) |
---|
| 70 | else: |
---|
| 71 | output.write("D %s %s\n%s\n" % (start, len(data), data)) |
---|
| 72 | output.flush() |
---|
| 73 | # if we had a short read, probably an error at the end of this, so |
---|
| 74 | # split the extent |
---|
| 75 | queue.push(start+len(data), end, len(data)<chunk) |
---|
| 76 | |
---|
| 77 | def main(args): |
---|
| 78 | """Takes the device to read as the first argument. May optionally specify |
---|
| 79 | the start and end offsets to recover. |
---|
| 80 | """ |
---|
| 81 | if len(args) < 1: |
---|
| 82 | sys.stderr.write("Syntax error: %s <inputfile> [start [end]]\n" |
---|
| 83 | "Reads from the specified input file (device)\n" |
---|
| 84 | "Optionally starts at the specified start offset and ends at the " |
---|
| 85 | "specified end offset.\n" |
---|
| 86 | "If not specified, start defaults to 0, and end defaults to the " |
---|
| 87 | "end of the device.\n" |
---|
| 88 | ) |
---|
| 89 | sys.exit(1) |
---|
| 90 | |
---|
| 91 | device = args[0] |
---|
| 92 | |
---|
| 93 | # determine device size |
---|
| 94 | drive = open(device, 'r') |
---|
| 95 | drive.seek(0, 2) |
---|
| 96 | end = drive.tell() |
---|
| 97 | |
---|
| 98 | # see if the user has specified a range to recover |
---|
| 99 | start = 0 |
---|
| 100 | try: |
---|
| 101 | start = int(args[1]) |
---|
| 102 | end = int(args[2]) |
---|
| 103 | except IndexError: |
---|
| 104 | pass |
---|
| 105 | |
---|
| 106 | sys.stderr.write("Reading %s from %s to %s\n" % (device, start, end)) |
---|
| 107 | sys.stderr.flush() |
---|
| 108 | |
---|
| 109 | output = sys.stdout |
---|
| 110 | |
---|
| 111 | recover(drive, start, end, output) |
---|
| 112 | |
---|
| 113 | if __name__ == '__main__': |
---|
| 114 | main(sys.argv[1:]) |
---|