Reading a UTF-8 File in C#

user picture user · Nov 15, 2013 · Viewed 8.2k times · Source

I have this file, it uses UTF-8 encoding and I am trying to read it. So far, I've used BinaryReader, FileStream, I tried File.ReadAllLines. So far, I'm only getting the first line of the file.

Here is some samples of what I've tried so far:

public partial class Form1 : Form
    {
        private string filename = @"C:\UNICORN\Server\Fil\Users30.mpm";
        public Form1()
        {
            InitializeComponent();
        }

        private static void clearText(RichTextBox rtb)
        {
            rtb.Text = "";
        }

        private void button1_Click(object sender, EventArgs e)
        {
            // use a binary reader
            clearText(richTextBox1);
            StringBuilder sb = new StringBuilder();

            using(BinaryReader br = new BinaryReader(File.Open(filename, FileMode.Open)))
            {
                int pos = 0;
                int length = (int) br.BaseStream.Length;    // length of the file
                byte[] bytes = new byte[length];

                for(int i = 0; i < length; i++)
                {
                    bytes[i] = br.ReadByte();
                }
                sb.Append("File Size: " + bytes.Length + "\n");
                sb.AppendLine(System.Text.ASCIIEncoding.ASCII.GetString(bytes));
            }

            richTextBox1.Text = sb.ToString();
        }

        private void button2_Click(object sender, EventArgs e)
        {
            // use a binary reader
            clearText(richTextBox1);
            StringBuilder sb = new StringBuilder();

            using(BinaryReader br = new BinaryReader(File.Open(filename, FileMode.Open)))
            {
                int pos = 0;
                int length = (int) br.BaseStream.Length;

                sb.Append("File Size: " + length + "\n");
                while (pos < length)
                {
                    var v = br.ReadInt32();
                    sb.Append((char)v + "\n");
                    pos += sizeof (int);
                }

                richTextBox1.Text = sb.ToString();
            }
        }

        private void button3_Click(object sender, EventArgs e)
        {
            // use a binary reader
            clearText(richTextBox1);
            StringBuilder sb = new StringBuilder();

            using (BinaryReader br = new BinaryReader(File.Open(filename, FileMode.Open)))
            {
                int pos = 0;
                int length = (int)br.BaseStream.Length;    // length of the file
                byte[] bytes = new byte[length];

                sb.Append("File Size: " + bytes.Length + "\n");
                for (int i = 0; i < length; i++)
                {
                    var b = br.ReadByte();
                    sb.Append("Byte: " + b + " - " + (char) b + "\n");
                }

                //sb.AppendLine(System.Text.ASCIIEncoding.ASCII.GetString(bytes));
            }

            richTextBox1.Text = sb.ToString();
        }

        private void button4_Click(object sender, EventArgs e)
        {
            // use a stream reader
            clearText(richTextBox1);
            StringBuilder sb = new StringBuilder();

            using(StreamReader sr = new StreamReader(filename, Encoding.UTF8))
            {
                sb.Append(sr.ReadLine() + "\n");
            }

            richTextBox1.Text = sb.ToString();
        }

        private void button5_Click(object sender, EventArgs e)
        {
            // use a stream reader
            clearText(richTextBox1);
            StringBuilder sb = new StringBuilder();

            using (StreamReader reader = new StreamReader(File.OpenRead(filename)))
            {
                while(!reader.EndOfStream)
                {
                    var line = reader.ReadLine();
                    if(line != null)
                    {
                        sb.AppendLine(line);
                    }
                }
            }

            richTextBox1.Text = sb.ToString();
        }

        private void button6_Click(object sender, EventArgs e)
        {
            // use a file stream and a decoder
            clearText(richTextBox1);
            StringBuilder sb = new StringBuilder();
            byte[] byData = new byte[255];
            char[] charData = new char[255];
            try
            {
                FileStream aFile = new FileStream(filename, FileMode.Open);
                aFile.Seek(55, SeekOrigin.Begin);
                aFile.Read(byData, 0, 100);
            } catch (Exception ex)
            {
                sb.Append("ERROR: " + ex.ToString());
            }

            Decoder d = Encoding.UTF8.GetDecoder();
            d.GetChars(byData, 0, byData.Length, charData, 0);

            foreach(char c in charData)
            {
                sb.Append(c + " ");
            }

            richTextBox1.Text = sb.ToString();
        }

        private void button7_Click(object sender, EventArgs e)
        {
            // find the encoding of a file, just trying to find out the encoding with this
            clearText(richTextBox1);
            StringBuilder sb = new StringBuilder();

            using(var r = new StreamReader(filename, detectEncodingFromByteOrderMarks: true))
            {
                var es = r.CurrentEncoding;
                sb.Append("Encoding: " + es);
            }

            richTextBox1.Text = sb.ToString();
        }

        private void button8_Click(object sender, EventArgs e)
        {
            // use File.ReadAllLines()
            clearText(richTextBox1);
            StringBuilder sb = new StringBuilder();

            foreach(var line in File.ReadAllLines(filename, Encoding.UTF8))
            {
                sb.Append(line.ToString() + "\n");
            }

            richTextBox1.Text = sb.ToString();
        }
    }

All of these examples only show me one line of the file. How would you go about reading/parsing the whole file?

The file contains a list of users and I'm ultimately trying to read the file to find those user names. Right now, it's only showing the first line of the file which is "UNICORN 3.06".

Answer

Guffa picture Guffa · Nov 15, 2013

If you just want the text, the ReadAllText method reads an entire file as utf-8:

    private void button1_Click(object sender, EventArgs e)
    {
        richTextBox1.Text = File.ReadAllText(filename);
    }

If you also want information about the file length, read the file as bytes and decode it as utf-8:

    private void button1_Click(object sender, EventArgs e)
    {
        byte[] data = File.ReadAllBytes(filename);
        richTextBox1.Text = "File size: " + data.Length + "\r\n" + Encoding.UTF8.GetString(data);
    }