How do I read data from a spreadsheet using the OpenXML Format SDK?

ProfK picture ProfK · Apr 12, 2010 · Viewed 39.2k times · Source

I need to read data from a single worksheet in an Excel 2007 workbook using the Open XML SDK 2.0. I have spent a lot of time searching for basic guidelines to doing this, but I have only found help on creating spreadsheets.

How do I iterate rows in a worksheet and then iterate the cells in each row, using this SDK?

Answer

saarp picture saarp · Nov 2, 2012

The other answer seemed more like a meta-answer. I have been struggling with this since using LINQ does work with separated document parts. The following code includes a wrapper function to get the value from a Cell, resolving any possible string lookups.

public void ExcelDocTest()
{
    Debug.WriteLine("Running through sheet.");
    int rowsComplete = 0;

    using (SpreadsheetDocument spreadsheetDocument =
                    SpreadsheetDocument.Open(@"path\to\Spreadsheet.xlsx", false))
    {
        WorkbookPart workBookPart = spreadsheetDocument.WorkbookPart;

        foreach (Sheet s in workBookPart.Workbook.Descendants<Sheet>())
        {
            WorksheetPart wsPart = workBookPart.GetPartById(s.Id) as WorksheetPart;
            Debug.WriteLine("Worksheet {1}:{2} - id({0}) {3}", s.Id, s.SheetId, s.Name,
                wsPart == null ? "NOT FOUND!" : "found.");

            if (wsPart == null)
            {
                continue;
            }

            Row[] rows = wsPart.Worksheet.Descendants<Row>().ToArray();

            //assumes the first row contains column names 
            foreach (Row row in wsPart.Worksheet.Descendants<Row>())
            {
                rowsComplete++;

                bool emptyRow = true;
                List<object> rowData = new List<object>();
                string value;

                foreach (Cell c in row.Elements<Cell>())
                {
                    value = GetCellValue(c);
                    emptyRow = emptyRow && string.IsNullOrWhiteSpace(value);
                    rowData.Add(value);
                }

                Debug.WriteLine("Row {0}: {1}", row,
                    emptyRow ? "EMPTY!" : string.Join(", ", rowData));
            }
        }

    }
    Debug.WriteLine("Done, processed {0} rows.", rowsComplete);
}

public static string GetCellValue(Cell cell)
{
    if (cell == null)
        return null;
    if (cell.DataType == null)
        return cell.InnerText;

    string value = cell.InnerText;
    switch (cell.DataType.Value)
    {
        case CellValues.SharedString:
            // For shared strings, look up the value in the shared strings table.
            // Get worksheet from cell
            OpenXmlElement parent = cell.Parent;
            while (parent.Parent != null && parent.Parent != parent
                    && string.Compare(parent.LocalName, "worksheet", true) != 0)
            {
                parent = parent.Parent;
            }
            if (string.Compare(parent.LocalName, "worksheet", true) != 0)
            {
                throw new Exception("Unable to find parent worksheet.");
            }

            Worksheet ws = parent as Worksheet;
            SpreadsheetDocument ssDoc = ws.WorksheetPart.OpenXmlPackage as SpreadsheetDocument;
            SharedStringTablePart sstPart = ssDoc.WorkbookPart.GetPartsOfType<SharedStringTablePart>().FirstOrDefault();

            // lookup value in shared string table
            if (sstPart != null && sstPart.SharedStringTable != null)
            {
                value = sstPart.SharedStringTable.ElementAt(int.Parse(value)).InnerText;
            }
            break;

        //this case within a case is copied from msdn. 
        case CellValues.Boolean:
            switch (value)
            {
                case "0":
                    value = "FALSE";
                    break;
                default:
                    value = "TRUE";
                    break;
            }
            break;
    }
    return value;
}

Edit: Thanks @Nitin-Jadhav for the correction to GetCellValue().