Using BrowserSession and HtmlAgilityPack to login to Facebook through .NET

Karsa Olong picture Karsa Olong · Aug 12, 2010 · Viewed 20k times · Source

I'm trying to use Rohit Agarwal's BrowserSession class together with HtmlAgilityPack to login to and subsequently navigate around Facebook.

I've previously managed doing the same by writing my own HttpWebRequest's. However, it then only works when I manually fetch the cookie from my browser and insert a fresh cookie-string to the request each time I'm doing a new "session". Now I'm trying to use BrowserSession to get smarter navigation.

Here's the current code:

BrowserSession b = new BrowserSession();

b.Get(@"http://www.facebook.com/login.php");
b.FormElements["email"] = "[email protected]";
b.FormElements["pass"] = "xxxxxxxx";
b.FormElements["lsd"] = "qDhIH";
b.FormElements["trynum"] = "1";
b.FormElements["persistent_inputcheckbox"] = "1";

var response = b.Post(@"https://login.facebook.com/login.php?login_attempt=1");

The above works fine. Trouble comes when I try to use this BrowserSession again to fetch another page. I'm doing it this way since BrowserSession saves the cookies from the last response and inserts them into the next request, thus I should not have to manually inser cookiedata fetched from my browser anymore.

However, when I try to do something like this:

var profilePage = b.Get(@"https://m.facebook.com/profile.php?id=1111111111");

the doc I get back is empty. I would appreciate any input on what I'm doing wrong.

Answer

Nathan picture Nathan · Nov 4, 2012

I fixed the root cause of this if anyone cares. It turns out the cookies were being saved in the CookieContainer of the REQUEST object and not the response object. I also added the ability to download a file (provided that file is string based). Code definitely is NOT thread-safe, but the object wasn't thread-safe to begin with:

public class BrowserSession
{
    private bool _isPost;
    private bool _isDownload;
    private HtmlDocument _htmlDoc;
    private string _download;

    /// <summary>
    /// System.Net.CookieCollection. Provides a collection container for instances of Cookie class 
    /// </summary>
    public CookieCollection Cookies { get; set; }

    /// <summary>
    /// Provide a key-value-pair collection of form elements 
    /// </summary>
    public FormElementCollection FormElements { get; set; }

    /// <summary>
    /// Makes a HTTP GET request to the given URL
    /// </summary>
    public string Get(string url)
    {
        _isPost = false;
        CreateWebRequestObject().Load(url);
        return _htmlDoc.DocumentNode.InnerHtml;
    }

    /// <summary>
    /// Makes a HTTP POST request to the given URL
    /// </summary>
    public string Post(string url)
    {
        _isPost = true;
        CreateWebRequestObject().Load(url, "POST");
        return _htmlDoc.DocumentNode.InnerHtml;
    }

    public string GetDownload(string url)
    {
        _isPost = false;
        _isDownload = true;
        CreateWebRequestObject().Load(url);
        return _download;
    }

    /// <summary>
    /// Creates the HtmlWeb object and initializes all event handlers. 
    /// </summary>
    private HtmlWeb CreateWebRequestObject()
    {
        HtmlWeb web = new HtmlWeb();
        web.UseCookies = true;
        web.PreRequest = new HtmlWeb.PreRequestHandler(OnPreRequest);
        web.PostResponse = new HtmlWeb.PostResponseHandler(OnAfterResponse);
        web.PreHandleDocument = new HtmlWeb.PreHandleDocumentHandler(OnPreHandleDocument);
        return web;
    }

    /// <summary>
    /// Event handler for HtmlWeb.PreRequestHandler. Occurs before an HTTP request is executed.
    /// </summary>
    protected bool OnPreRequest(HttpWebRequest request)
    {
        AddCookiesTo(request);               // Add cookies that were saved from previous requests
        if (_isPost) AddPostDataTo(request); // We only need to add post data on a POST request
        return true;
    }

    /// <summary>
    /// Event handler for HtmlWeb.PostResponseHandler. Occurs after a HTTP response is received
    /// </summary>
    protected void OnAfterResponse(HttpWebRequest request, HttpWebResponse response)
    {
        SaveCookiesFrom(request, response); // Save cookies for subsequent requests

        if (response != null && _isDownload)
        {
            Stream remoteStream = response.GetResponseStream();
            var sr = new StreamReader(remoteStream);
            _download = sr.ReadToEnd();
        }
    }

    /// <summary>
    /// Event handler for HtmlWeb.PreHandleDocumentHandler. Occurs before a HTML document is handled
    /// </summary>
    protected void OnPreHandleDocument(HtmlDocument document)
    {
        SaveHtmlDocument(document);
    }

    /// <summary>
    /// Assembles the Post data and attaches to the request object
    /// </summary>
    private void AddPostDataTo(HttpWebRequest request)
    {
        string payload = FormElements.AssemblePostPayload();
        byte[] buff = Encoding.UTF8.GetBytes(payload.ToCharArray());
        request.ContentLength = buff.Length;
        request.ContentType = "application/x-www-form-urlencoded";
        System.IO.Stream reqStream = request.GetRequestStream();
        reqStream.Write(buff, 0, buff.Length);
    }

    /// <summary>
    /// Add cookies to the request object
    /// </summary>
    private void AddCookiesTo(HttpWebRequest request)
    {
        if (Cookies != null && Cookies.Count > 0)
        {
            request.CookieContainer.Add(Cookies);
        }
    }

    /// <summary>
    /// Saves cookies from the response object to the local CookieCollection object
    /// </summary>
    private void SaveCookiesFrom(HttpWebRequest request, HttpWebResponse response)
    {
        //save the cookies ;)
        if (request.CookieContainer.Count > 0 || response.Cookies.Count > 0)
        {
            if (Cookies == null)
            {
                Cookies = new CookieCollection();
            }

            Cookies.Add(request.CookieContainer.GetCookies(request.RequestUri));
            Cookies.Add(response.Cookies);
        }
    }

    /// <summary>
    /// Saves the form elements collection by parsing the HTML document
    /// </summary>
    private void SaveHtmlDocument(HtmlDocument document)
    {
        _htmlDoc = document;
        FormElements = new FormElementCollection(_htmlDoc);
    }
}

/// <summary>
/// Represents a combined list and collection of Form Elements.
/// </summary>
public class FormElementCollection : Dictionary<string, string>
{
    /// <summary>
    /// Constructor. Parses the HtmlDocument to get all form input elements. 
    /// </summary>
    public FormElementCollection(HtmlDocument htmlDoc)
    {
        var inputs = htmlDoc.DocumentNode.Descendants("input");
        foreach (var element in inputs)
        {
            string name = element.GetAttributeValue("name", "undefined");
            string value = element.GetAttributeValue("value", "");

            if (!this.ContainsKey(name))
            {
                if (!name.Equals("undefined"))
                {
                    Add(name, value);
                }
            }
        }
    }

    /// <summary>
    /// Assembles all form elements and values to POST. Also html encodes the values.  
    /// </summary>
    public string AssemblePostPayload()
    {
        StringBuilder sb = new StringBuilder();
        foreach (var element in this)
        {
            string value = System.Web.HttpUtility.UrlEncode(element.Value);
            sb.Append("&" + element.Key + "=" + value);
        }
        return sb.ToString().Substring(1);
    }
}