Read a file/URL line-by-line in Swift

Matt picture Matt · Jul 5, 2014 · Viewed 46.9k times · Source

I am trying to read a file given in an NSURL and load it into an array, with items separated by a newline character \n.

Here is the way I've done it so far:

var possList: NSString? = NSString.stringWithContentsOfURL(filePath.URL) as? NSString
if var list = possList {
    list = list.componentsSeparatedByString("\n") as NSString[]
    return list
}
else {
    //return empty list
}

I'm not very happy with this for a couple of reasons. One, I'm working with files that range from a few kilobytes to hundreds of MB in size. As you can imagine, working with strings this large is slow and unwieldy. Secondly, this freezes up the UI when it's executing--again, not good.

I've looked into running this code in a separate thread, but I've been having trouble with that, and besides, it still doesn't solve the problem of dealing with huge strings.

What I'd like to do is something along the lines of the following pseudocode:

var aStreamReader = new StreamReader(from_file_or_url)
while aStreamReader.hasNextLine == true {
    currentline = aStreamReader.nextLine()
    list.addItem(currentline)
}

How would I accomplish this in Swift?

A few notes about the files I'm reading from: All files consist of short (<255 chars) strings separated by either \n or \r\n. The length of the files range from ~100 lines to over 50 million lines. They may contain European characters, and/or characters with accents.

Answer

Martin R picture Martin R · Jul 9, 2014

(The code is for Swift 2.2/Xcode 7.3 now. Older versions can be found in the edit history if somebody needs it. An updated version for Swift 3 is provided at the end.)

The following Swift code is heavily inspired by the various answers to How to read data from NSFileHandle line by line?. It reads from the file in chunks, and converts complete lines to strings.

The default line delimiter (\n), string encoding (UTF-8) and chunk size (4096) can be set with optional parameters.

class StreamReader  {

    let encoding : UInt
    let chunkSize : Int

    var fileHandle : NSFileHandle!
    let buffer : NSMutableData!
    let delimData : NSData!
    var atEof : Bool = false

    init?(path: String, delimiter: String = "\n", encoding : UInt = NSUTF8StringEncoding, chunkSize : Int = 4096) {
        self.chunkSize = chunkSize
        self.encoding = encoding

        if let fileHandle = NSFileHandle(forReadingAtPath: path),
            delimData = delimiter.dataUsingEncoding(encoding),
            buffer = NSMutableData(capacity: chunkSize)
        {
            self.fileHandle = fileHandle
            self.delimData = delimData
            self.buffer = buffer
        } else {
            self.fileHandle = nil
            self.delimData = nil
            self.buffer = nil
            return nil
        }
    }

    deinit {
        self.close()
    }

    /// Return next line, or nil on EOF.
    func nextLine() -> String? {
        precondition(fileHandle != nil, "Attempt to read from closed file")

        if atEof {
            return nil
        }

        // Read data chunks from file until a line delimiter is found:
        var range = buffer.rangeOfData(delimData, options: [], range: NSMakeRange(0, buffer.length))
        while range.location == NSNotFound {
            let tmpData = fileHandle.readDataOfLength(chunkSize)
            if tmpData.length == 0 {
                // EOF or read error.
                atEof = true
                if buffer.length > 0 {
                    // Buffer contains last line in file (not terminated by delimiter).
                    let line = NSString(data: buffer, encoding: encoding)

                    buffer.length = 0
                    return line as String?
                }
                // No more lines.
                return nil
            }
            buffer.appendData(tmpData)
            range = buffer.rangeOfData(delimData, options: [], range: NSMakeRange(0, buffer.length))
        }

        // Convert complete line (excluding the delimiter) to a string:
        let line = NSString(data: buffer.subdataWithRange(NSMakeRange(0, range.location)),
            encoding: encoding)
        // Remove line (and the delimiter) from the buffer:
        buffer.replaceBytesInRange(NSMakeRange(0, range.location + range.length), withBytes: nil, length: 0)

        return line as String?
    }

    /// Start reading from the beginning of file.
    func rewind() -> Void {
        fileHandle.seekToFileOffset(0)
        buffer.length = 0
        atEof = false
    }

    /// Close the underlying file. No reading must be done after calling this method.
    func close() -> Void {
        fileHandle?.closeFile()
        fileHandle = nil
    }
}

Usage:

if let aStreamReader = StreamReader(path: "/path/to/file") {
    defer {
        aStreamReader.close()
    }
    while let line = aStreamReader.nextLine() {
        print(line)
    }
}

You can even use the reader with a for-in loop

for line in aStreamReader {
    print(line)
}

by implementing the SequenceType protocol (compare http://robots.thoughtbot.com/swift-sequences):

extension StreamReader : SequenceType {
    func generate() -> AnyGenerator<String> {
        return AnyGenerator {
            return self.nextLine()
        }
    }
}

Update for Swift 3/Xcode 8 beta 6: Also "modernized" to use guard and the new Data value type:

class StreamReader  {

    let encoding : String.Encoding
    let chunkSize : Int
    var fileHandle : FileHandle!
    let delimData : Data
    var buffer : Data
    var atEof : Bool

    init?(path: String, delimiter: String = "\n", encoding: String.Encoding = .utf8,
          chunkSize: Int = 4096) {

        guard let fileHandle = FileHandle(forReadingAtPath: path),
            let delimData = delimiter.data(using: encoding) else {
                return nil
        }
        self.encoding = encoding
        self.chunkSize = chunkSize
        self.fileHandle = fileHandle
        self.delimData = delimData
        self.buffer = Data(capacity: chunkSize)
        self.atEof = false
    }

    deinit {
        self.close()
    }

    /// Return next line, or nil on EOF.
    func nextLine() -> String? {
        precondition(fileHandle != nil, "Attempt to read from closed file")

        // Read data chunks from file until a line delimiter is found:
        while !atEof {
            if let range = buffer.range(of: delimData) {
                // Convert complete line (excluding the delimiter) to a string:
                let line = String(data: buffer.subdata(in: 0..<range.lowerBound), encoding: encoding)
                // Remove line (and the delimiter) from the buffer:
                buffer.removeSubrange(0..<range.upperBound)
                return line
            }
            let tmpData = fileHandle.readData(ofLength: chunkSize)
            if tmpData.count > 0 {
                buffer.append(tmpData)
            } else {
                // EOF or read error.
                atEof = true
                if buffer.count > 0 {
                    // Buffer contains last line in file (not terminated by delimiter).
                    let line = String(data: buffer as Data, encoding: encoding)
                    buffer.count = 0
                    return line
                }
            }
        }
        return nil
    }

    /// Start reading from the beginning of file.
    func rewind() -> Void {
        fileHandle.seek(toFileOffset: 0)
        buffer.count = 0
        atEof = false
    }

    /// Close the underlying file. No reading must be done after calling this method.
    func close() -> Void {
        fileHandle?.closeFile()
        fileHandle = nil
    }
}

extension StreamReader : Sequence {
    func makeIterator() -> AnyIterator<String> {
        return AnyIterator {
            return self.nextLine()
        }
    }
}