Index: data_recovery/trunk/README
===================================================================
--- data_recovery/trunk/README	(revision 28)
+++ data_recovery/trunk/README	(revision 28)
@@ -0,0 +1,2 @@
+Use salvage_data.py to read data off the disk, and stream a log of the data.
+Then use recover_log.py to turn that back into a disk image like you would have gotten with dd if there weren't problems, and also a more compact log file.
Index: data_recovery/trunk/recover_log.py
===================================================================
--- data_recovery/trunk/recover_log.py	(revision 28)
+++ data_recovery/trunk/recover_log.py	(revision 28)
@@ -0,0 +1,172 @@
+#!/usr/bin/python
+"""Recovers a disk image from a log file generated by salvage_data.py.
+"""
+import sys
+
+def coalesce_extents(one, two):
+    """ASSUMES the extents overlap."""
+    return (min(one[0], two[0]), max(one[1], two[1]))
+
+def extents_overlap(one, two):
+    """returns True if they touch or overlap in some way, False if they don't.
+    """
+    return one[0] <= two[0] <= one[1] or \
+        one[0] <= two[1] <= one[1] or \
+        two[0] <= one[0] <= two[1] or \
+        two[0] <= one[1] <= two[1]
+
+class Extents(object):
+    """Tracks the extents that have been covered by the log."""
+    def __init__(self):
+        self.extents = []
+
+    def add(self, offset, length):
+        """Adds an extent at offset of the given length."""
+        self.add_extent((offset, offset+length))
+
+    def add_extent(self, extent):
+        """Adds an extent."""
+        start, end = extent
+        if start == end: # nothing to add
+            return
+
+        # find any existing extent that overlaps with the one being added.
+        for i in range(len(self.extents)):
+            current_extent = self.extents[i]
+            if extents_overlap(extent, current_extent):
+                self.extents.pop(i)
+                new_extent = coalesce_extents(current_extent, extent)
+                self.add_extent(new_extent)
+                return
+
+        # No coalescing needed
+        self.extents.append(extent)
+
+    def bytes_covered(self):
+        """Returns the number of bytes covered by the extents."""
+        return sum([b-a for a, b in self.extents])
+
+    def byte_range(self):
+        """Returns a tuple of the starting and ending offsets covered by these
+        extents.
+        """
+        self.extents.sort()
+        return (self.extents[0][0], self.extents[-1][1])
+
+    def __str__(self):
+        start, end = self.byte_range()
+        covered = self.bytes_covered()
+        return ' '.join([repr(e) for e in self.extents]) + \
+            "\n%s of %s bytes covered (%s remain)" % (covered, end-start,
+            end-start-covered)
+
+def report_log(good, bad):
+    """let the user know what extents have been accounted for"""
+    sys.stderr.write("Good extents: %s\nBad extents: %s\n" % (good, bad))
+
+def write_image_from_log(log, image):
+    """Reads from the log file object, and writes the data to the image file
+    object.
+    """
+    good_extents = Extents()
+    bad_extents = Extents()
+    try:
+        while True:
+            meta = log.readline().split()
+            if not meta:
+                break
+            if meta[0] == 'D': # data
+                offset = long(meta[1])
+                length = long(meta[2])
+                data = log.read(length)
+                if len(data) != length:
+                    raise Exception("Short line: %s of %s bytes at offset %s" \
+                        % (len(data), length, offset))
+                log.read(1) # the extra newline
+
+                sys.stderr.write("writing %s bytes at %s\n" % (length, offset))
+                image.seek(offset)
+                image.write(data)
+                good_extents.add(offset, length)
+            elif meta[0] == 'E':
+                offset = long(meta[1])
+                if len(meta) > 2:
+                    length = long(meta[2])
+                else:
+                    length = 1
+                sys.stderr.write("skipping %s bad bytes at %s\n" % (length,
+                    offset))
+                bad_extents.add(offset, length)
+            else:
+                raise Exception("Invalid line: %r" % (meta,))
+    except:
+        report_log(good_extents, bad_extents)
+        raise
+
+    report_log(good_extents, bad_extents)
+    return good_extents, bad_extents
+
+def write_log_from_image(image, out, good_extents, bad_extents):
+    """Write out a concise log file with the same information as the input
+    file.
+    """
+    max_extent_size = 10*1024**2
+
+    extents = [(s, e, 'D') for s, e in good_extents.extents] + \
+              [(s, e, 'E') for s, e in bad_extents.extents]
+    extents.sort()
+    for start, end, state in extents:
+        if state == 'E':
+            out.write("E %s %s\n" % (start, end - start))
+            out.flush()
+        elif state == 'D':
+            offset = start
+            while offset < end:
+                image.seek(offset)
+                chunk = min(max_extent_size, end-offset)
+                data = image.read(chunk)
+                if len(data) != chunk:
+                    raise Exception("Short read from image file")
+                out.write("D %s %s\n%s\n" % (offset, chunk, data))
+                out.flush()
+                offset += chunk
+        else:
+            raise Exception("INTERNAL ERROR: Invalid state \"%s\"" % state)
+
+
+def usage(out):
+    """outputs help message"""
+    out.write("Syntax Error: %s [-l] <imagefilename>\n"
+            "Requires the file to which to write the image, and reads the "
+            "recovery log from stdin.\n"
+            "/dev/null may be specified as the image filename to just show "
+            "summary information.\n"
+    )
+
+def main(args):
+    """Reads log from standard in, writes an image to the image file"""
+    if len(args) < 1:
+        usage(sys.stderr)
+        sys.exit(1)
+
+    output_log = False
+    if args[0] == '-l':
+        output_log = True
+        if len(args) != 2:
+            usage(sys.stderr)
+            sys.exit(1)
+        filename = args[1]
+    else:
+        if len(args) != 1:
+            usage(sys.stderr)
+            sys.exit(1)
+        filename = args[0]
+
+    image = open(filename, 'w')
+
+    good, bad = write_image_from_log(sys.stdin, image)
+    if output_log:
+        write_log_from_image(open(filename, 'r'), sys.stdout, good, bad)
+
+if __name__ == '__main__':
+    main(sys.argv[1:])
Index: data_recovery/trunk/salvage_data.py
===================================================================
--- data_recovery/trunk/salvage_data.py	(revision 28)
+++ data_recovery/trunk/salvage_data.py	(revision 28)
@@ -0,0 +1,114 @@
+#!/usr/bin/python
+"""salvage_data
+
+This is a tool for recovering data from a partially failing drive.  It will
+start at the beginning of the device, and start reading sequentially until it
+hits a problem, and will then split the remaining work to avoid bad sections as
+long as possible.
+"""
+import sys
+
+class WorkQueue(object):
+    """Queue for the extents left to attempt to read."""
+    def __init__(self, start, end):
+        self.queue = [(start, end)]
+
+    def is_empty(self):
+        """Lets us know when we are done"""
+        return not self.queue
+
+    def pop(self):
+        """Returns a (start, end) tuple of the next section to attempt"""
+        return self.queue.pop()
+
+    def push(self, start, end, split=False):
+        """Adds the start, end section to the queue, splitting if told to"""
+        if start >= end:
+            # nothing to queue
+            return
+        if split:
+            middle = (start + end) / 2
+            if start < middle:
+                self.queue.insert(0, (start, middle))
+            if middle < end:
+                self.queue.append((middle, end))
+        else:
+            self.queue.append((start, end))
+
+def recover(drive, start, end, output):
+    """Recovers data from the provided file object within the given start and
+    end, and writes the log to the output file object.
+    """
+    extent_size = 1024*1024 # Size of blocks to attempt to read
+    # queue of data ranges to try to recover
+    queue = WorkQueue(start, end)
+
+    while not queue.is_empty():
+        start, end = queue.pop()
+        if end - start < extent_size:
+            chunk = end - start
+        else:
+            chunk = extent_size
+        try:
+            drive.seek(start)
+            data = drive.read(chunk)
+            error = False
+        except IOError:
+            error = True
+
+        if error:
+            # error reading at this offset
+            output.write("E %s\n" % start)
+            output.flush()
+            queue.push(start+1, end, True)
+        elif not data:
+            # error reading at this offset; I think this only happened due to a
+            # software bug
+            output.write("S %s\n" % start)
+            output.flush()
+            queue.push(start+1, end, True)
+        else:
+            output.write("D %s %s\n%s\n" % (start, len(data), data))
+            output.flush()
+            # if we had a short read, probably an error at the end of this, so
+            # split the extent
+            queue.push(start+len(data), end, len(data)<chunk)
+
+def main(args):
+    """Takes the device to read as the first argument.  May optionally specify
+    the start and end offsets to recover.
+    """
+    if len(args) < 1:
+        sys.stderr.write("Syntax error: %s <inputfile> [start [end]]\n"
+            "Reads from the specified input file (device)\n"
+            "Optionally starts at the specified start offset and ends at the "
+            "specified end offset.\n"
+            "If not specified, start defaults to 0, and end defaults to the "
+            "end of the device.\n"
+        )
+        sys.exit(1)
+
+    device = args[0]
+
+    # determine device size
+    drive = open(device, 'r')
+    drive.seek(0, 2)
+    end = drive.tell()
+
+    # see if the user has specified a range to recover
+    start = 0
+    try:
+        start = int(args[1])
+        end = int(args[2])
+    except IndexError:
+        pass
+
+    sys.stderr.write("Reading %s from %s to %s\n" % (device, start, end))
+    sys.stderr.flush()
+
+    output = sys.stdout
+
+    recover(drive, start, end, output)
+
+if __name__ == '__main__':
+    main(sys.argv[1:])
