Get datatype from values passed as string

callisto picture callisto · Mar 15, 2011 · Viewed 20.8k times · Source

I am writing a framework that will connect to many different data source types and return values from these sources. The easy ones are SQL, Access and Oracle. The tougher ones are Sharepoint, CSV.

If I return values from text based sources, I would like to determine the datatype of the data.

Since a CSV is all text, there is no metadata to interrogate, I would need to parse the data somehow to determine the data type.

Example:

List of "true", "true", "false", "false" would be boolean
List of "1", "0", "1", "0" would be boolean
List of "1", "4", "-10", "500" would be integer
List of "15.2", "2015.5896", "1.0245", "500" would be double
List of "2001/01/01", "2010/05/29 12:00", "1989/12/25 10:34:21" would be datetime

It is based on https://stackoverflow.com/questions/606365/c-doubt-finding-the-datatype/606381#606381

object ParseString(string str)
{

   Int32 intValue;
   Int64 bigintValue;
   double doubleValue;
   bool boolValue;
   DateTime dateValue;        

// Place checks higher in if-else statement to give higher priority to type.

if (Int32.TryParse(str, out intValue))
    return intValue;
else if (Int64.TryParse(str, out bigintValue))
    return bigintValue;
else if (double.TryParse(str, out doubleValue))
    return doubleValue;       
else if (bool.TryParse(str, out boolValue))
    return boolValue;
else if (DateTime.TryParse(str, out dateValue))
    return dateValue;
else return str;

}

Edit: I only need to cater for the following:

BIT  
DATETIME  
INT  
NVARCHAR(255)  
NVARCHAR(MAX)  
BIGINT  
DECIMAL(36, 17)  

Can you see any possible improvement to the priority?

Answer

callisto picture callisto · Mar 16, 2011

I've come up with the following solution which works:

enum dataType
    {
        System_Boolean = 0,
        System_Int32 = 1,
        System_Int64 = 2,
        System_Double = 3,
        System_DateTime = 4,
        System_String = 5
    }

    private dataType ParseString(string str)
    {

        bool boolValue;
        Int32 intValue;
        Int64 bigintValue;
        double doubleValue;
        DateTime dateValue;

        // Place checks higher in if-else statement to give higher priority to type.

        if (bool.TryParse(str, out boolValue))
            return dataType.System_Boolean;
        else if (Int32.TryParse(str, out intValue))
            return dataType.System_Int32;
        else if (Int64.TryParse(str, out bigintValue))
            return dataType.System_Int64;
        else if (double.TryParse(str, out doubleValue))
            return dataType.System_Double;
        else if (DateTime.TryParse(str, out dateValue))
            return dataType.System_DateTime;
        else return dataType.System_String;

    }


    /// <summary>
    /// Gets the datatype for the Datacolumn column
    /// </summary>
    /// <param name="column">Datacolumn to get datatype of</param>
    /// <param name="dt">DataTable to get datatype from</param>
    /// <param name="colSize">ref value to return size for string type</param>
    /// <returns></returns>
    public Type GetColumnType(DataColumn column, DataTable dt, ref int colSize)
    {

        Type T;
        DataView dv = new DataView(dt);
        //get smallest and largest values
        string colName = column.ColumnName;

        dv.RowFilter = "[" + colName + "] = MIN([" + colName + "])";
        DataTable dtRange = dv.ToTable();
        string strMinValue = dtRange.Rows[0][column.ColumnName].ToString();
        int minValueLevel = (int)ParseString(strMinValue);

        dv.RowFilter = "[" + colName + "] = MAX([" + colName + "])";
        dtRange = dv.ToTable();
        string strMaxValue = dtRange.Rows[0][column.ColumnName].ToString();
        int maxValueLevel = (int)ParseString(strMaxValue);
        colSize = strMaxValue.Length;

        //get max typelevel of first n to 50 rows
        int sampleSize = Math.Max(dt.Rows.Count, 50);
        int maxLevel = Math.Max(minValueLevel, maxValueLevel);

        for (int i = 0; i < sampleSize; i++)
        {
            maxLevel = Math.Max((int)ParseString(dt.Rows[i][column].ToString()), maxLevel);
        }

        string enumCheck = ((dataType)maxLevel).ToString();
        T = Type.GetType(enumCheck.Replace('_', '.'));

        //if typelevel = int32 check for bit only data & cast to bool
        if (maxLevel == 1 && Convert.ToInt32(strMinValue) == 0 && Convert.ToInt32(strMaxValue) == 1)
        {
            T = Type.GetType("System.Boolean");
        }

        if (maxLevel != 5) colSize = -1;


        return T;
    }