How can I integrate Tomcat6's catalina.out file with Logstash + ElasticSearch + Kibana?

Mr_Thumb picture Mr_Thumb · Aug 21, 2014 · Viewed 17.7k times · Source

I want to have a centralized logging server.

I have created two Ubuntu 12.04 LTS servers based on this iso.

After successfully and strictly following this tutorial steps, I have:

  1. One Logging Server with Logstash + ElasticSearch + Kibana.
  2. And one Application Server with a Logstash-Forwarder, Tomcat 6 and another app, which logs are being registered in the catalina.out file.

My question is:

  • What 'for-dummies' steps should I follow now in order to send catalina.out logs from the App Server to the Logging Server in such a way that those logs are properly structured and query-able in ElasticSearch + Kibana?

Mind that catalina.out has not just one uniform format, and that multi-line Java exceptions are going to be registered as well.

I have read some tutorials regarding this, but I feel they weren't sufficiently well-explained for a dummy like me: Lacking examples of full paths to files, appropriate file names, required ubuntu commands, and so on.

Thanks in advance!

Answer

Tony BenBrahim picture Tony BenBrahim · Aug 26, 2014

Here is what I have, note that the access logs use a custom log format (documented below) and I extract a bit more information out of the Tomcat logs (it is useful to have logLevel as a field, for example):

input { 
    file {
        type => "access-log"
        path => [ "C:/apache-tomcat-6.0.18/logs/*.txt" ]
    }
    file {
        type => "tomcat"
        path => [ "C:/apache-tomcat-6.0.18/logs/*.log" ]
        codec => multiline {
          negate => true
          pattern => "(^%{MONTH} %{MONTHDAY}, 20%{YEAR} %{HOUR}:?%{MINUTE}(?::?%{SECOND}) (?:AM|PM))"
          what => "previous"
        }
    }
}

filter {
    if [type] == "access-log" {
        grok {
            # Access log pattern is %a %{waffle.servlet.NegotiateSecurityFilter.PRINCIPAL}s %t %m %U%q %s %B %T "%{Referer}i" "%{User-Agent}i"
            match => [ "message" , "%{IPV4:clientIP} %{NOTSPACE:user} \[%{DATA:timestamp}\] %{WORD:method} %{NOTSPACE:request} %{NUMBER:status} %{NUMBER:bytesSent} %{NUMBER:duration} \"%{NOTSPACE:referer}\" \"%{DATA:userAgent}\"" ]
            remove_field => [ "message" ]
        }
        grok{
            match => [ "request", "/%{USERNAME:app}/" ]
            tag_on_failure => [ ]
        }
        date {
            match => [ "timestamp", "dd/MMM/YYYY:HH:mm:ss Z" ]
            remove_field => [ "timestamp" ]
        }
        geoip {
            source => ["clientIP"]
        }
        dns {
            reverse => [ "clientIP" ]
        }
        mutate {
            lowercase => [ "user" ]
            convert => [ "bytesSent", "integer", "duration", "float" ]
        }
        if [referer] == "-" {
            mutate {
                remove_field => [ "referer" ]
            }
        }
        if [user] == "-" {
            mutate {
                remove_field => [ "user" ]
            }
        }
    }
    if [type] == "tomcat" {
        if [message] !~ /(.+)/  {
            drop { }
        }
        grok{
             patterns_dir => "./patterns"
             match => [ "message", "%{CATALINA_DATESTAMP:timestamp} %{NOTSPACE:className} %{WORD:methodName}\r\n%{LOGLEVEL: logLevel}: %{GREEDYDATA:message}" ]
             overwrite => [ "message" ]
        }
        grok{
            match => [ "path", "/%{USERNAME:app}.20%{NOTSPACE}.log"]
            tag_on_failure => [ ]
        }
        #Aug 25, 2014 11:23:31 AM
        date{
            match => [ "timestamp", "MMM dd, YYYY hh:mm:ss a" ]
            remove_field => [ "timestamp" ]
        }
    }
}

output {
  elasticsearch { host => somehost}
}