1
0
mirror of https://github.com/meineerde/rackstash.git synced 2026-01-31 17:27:13 +00:00

Add Rack middleware

The middleware can be used in a Rack appliction wrap all log messages
emitted to the logger during a single request in a single emitted log
event. This ensures that all data concerning the request, including log
messages as well as additional fields and tags are logged as one single
event.

This ensures that the data is kept as a whole when the log event is
handled by later systems like Logstash. Each request can be analyzed as
a whole without having to group or parse complex multi-line log formats.
This commit is contained in:
Holger Just 2017-10-17 23:46:12 +02:00
parent cb95c0b0e4
commit c58c70febb
6 changed files with 624 additions and 0 deletions

View File

@ -29,6 +29,11 @@ matrix:
- rvm: 2.4.2
env: COVERAGE=1
# An older Ruby/Rack combination as used by Rails 3.2. As long as it works
# with our required Ruby version, we want to support it.
- rvm: 2.1.10
env: RACK_VERSION=1.4.5
allow_failures:
- rvm: ruby-head
- rvm: jruby-head

View File

@ -8,3 +8,11 @@
source 'https://rubygems.org'
gemspec name: 'rackstash'
group :test do
if ENV['RACK_VERSION']
gem 'rack', "~> #{ENV['RACK_VERSION']}"
else
gem 'rack'
end
end

View File

@ -129,6 +129,12 @@ module Rackstash
FIELD_ERROR_MESSAGE = 'error_message'.freeze
FIELD_ERROR_TRACE = 'error_trace'.freeze
FIELD_DURATION = 'duration'.freeze
FIELD_METHOD = 'method'.freeze
FIELD_PATH = 'path'.freeze
FIELD_SCHEME = 'scheme'.freeze
FIELD_STATUS = 'status'.freeze
# Returns a {Flow} which is used by the normal logger {Flow}s to write details
# about any unexpected errors during interaction with their {Adapters}.
#

21
lib/rackstash/rack.rb Normal file
View File

@ -0,0 +1,21 @@
# frozen_string_literal: true
# Copyright 2016 Holger Just
#
# This software may be modified and distributed under the terms
# of the MIT license. See the LICENSE.txt file for details.
require 'rack'
module Rackstash
# This module contains the integration classes into
# [Rack](https://github.com/rack/rack), the generic webserver interface for
# Ruby frameworks.
#
# Here, we provide a very basic integration. You can use it as a building
# block for more specific integrations into frameworks like Hanami, Rails, or
# Sinatra.
module Rack
end
end
require 'rackstash/rack/middleware'

View File

@ -0,0 +1,334 @@
# frozen_string_literal: true
# Copyright 2016 Holger Just
#
# This software may be modified and distributed under the terms
# of the MIT license. See the LICENSE.txt file for details.
require 'rack'
require 'rackstash/helpers/time'
module Rackstash
module Rack
# The Rack logging middleware provides a way to log structured data for each
# request passing through to the Rack application.
#
# The middleware works as a combination of the `::Rack::Logger` and
# especially the `::Rack::CommonLogger` middlewares shipped with Rack. For
# each request, we open a new log environment by pushing a new {Buffer} to
# the current Thread's stack. All log messages as well as any fields logged
# to the {#logger} will be added to this buffer. After the request has
# ended, we pop the buffer from the stack again and flush all logged data to
# the configured log flows of the given {Rackstash::Logger} object.
#
# By default, we log the following fields about each request:
#
# * `"method"` - the HTTP verb, e.g. `"GET"`, `"POST"`, or "`PUT`"
# * `"path"` - the request path, e.g. `"/desserts/cake?cream=1&cheese=0"`
# * `"status"` - the numeric HTTP response code, e.g. `200`, or `500`
# * `"duration"` - the duration in seconds with fractions between first
# receiving the request and its eventual delivery to the client by the
# application server, e.g. `0.295372`. This time equals the duration
# logged by the `::Rack::CommonLogger` middleware which ships with Rack.
#
# In addition to that, we can also log additional data from the request or
# the response. For that, you can set additional (optionally lazy-evaluated)
# fields and tags.
#
# In a Rack builder, you can define the middleware as follows
#
# # First, create the logger however you like. You can use multiple
# # flows, add filters, ...
# logger = Rackstash::Logger.new(STDOUT)
#
# use Rackstash::Rack::Middleware,
# logger,
# buffering: :full,
# request_fields: {
# 'user_agent' => ->(request) { request.headers['user-agent'] },
# 'remote_ip' => ->(request) { request.ip },
# 'server' => Socket.gethostname
# },
# request_tags: ['rack', ->(request) { request.scheme }],
# response_fields: ->(headers) {
# { 'location' => headers['location'] }
# },
# response_tags: ->(headers) { ["cache_status_#{headers[x-rack-cache]}"] }
#
# Here, we instruct the middleware to log each request to the provided
# `logger`. For each request, we will by default emit a single log event to
# each flow of the logger. You can customize this by setting the `buffering`
# argument. See {Buffer#buffering} for a description of the values. During
# development, it might be useful to set this to `:data` to emit log events
# directly for each logged message instead of only once after the request
# was finished.
#
# In the log event(s) emitted for the request, we add the default fields
# described above, plus some data extracted from the HTTP request data. For
# dynamic values, we can use the `::Rack::Request` object for the current
# request. In our example above, we add the user agent header of the
# client's webbrowser and the client's IP address. We also set a `"server"`
# field containing the server's hostname. We are also setting some tags, in
# this case we set the static tag `"rack"` and a tag for the current
# request's scheme. Usually, this is either `"http"` or `"https"`.
#
# For the response, we have the same flexibility. Here, we can use the hash
# containing the response headers to set additional fields or tags to the
# log buffer. In our example above, we add the value of the `Location`
# header of the response as a field and add a tag based on the status of the
# Rack::Cache middleware our app might be using.
#
# Note that all fields set by the middleware are deep-merged into the
# Buffer's current fields using {Fields::Hash#deep_merge!}. All existing
# fields set by the application will be preserved and are not overwritten.
#
# In case there is an unhandled exception during request processing in the
# Rack aplication or any lower middleware, we log this error using
# {Rackstash::Logger#add_exception}. The emitted event will still contain
# all added request fields and tags as well as the `"status"` field (set to
# `500`) and the duration of the request so far.
class Middleware
include Rackstash::Helpers::Time
# @return [Rackstash::Logger] the Rackstash logger used to log the
# request details
attr_reader :logger
# @return [Hash<#to_s => Object>, Proc] a `Hash` specifying fields to be
# logged for each request. The fields will be added to the log {Buffer}
# before the request is passed to the Rack application. You can either
# give a literal `Hash` object or a `Proc` which returns such a `Hash`.
#
# When including `Proc`s, they are evaluated with the `Rack::Request`
# object as a scope. See {Rackstash::Fields::Hash#deep_merge!}.
attr_accessor :request_fields
# @return [Array<#to_s, Proc>, Proc] Strings to add as tags to the logged
# event. The fields will be added to the log {Buffer} before the request
# is passed to the Rack application. You can either give (`Array`s of)
# `String`s here or `Proc`s which return a `String` or an `Array` of
# `String`s when called. When including `Proc`s, they are evaluated with
# the `Rack::Request` object as a scope.
# See {Rackstash::Fields::Tags#merge!}.
attr_accessor :request_tags
# @return [Hash<#to_s => Object>, Proc] a `Hash` specifying fields to be
# logged for each response. The fields will be added to the log {Buffer}
# after the response was successfully returned by the Rack application
# and no unhandled exception was thrown. You can either give a literal
# `Hash` object or a `Proc` which returns such a `Hash`.
#
# When including `Proc`s, they are evaluated with the response headers
# in a `::Rack::Utils::HeaderHash` object as a scope. See
# {Rackstash::Fields::Hash#deep_merge!}.
attr_accessor :response_fields
# @return [Array<#to_s, Proc>, Proc] Strings to add as tags to the logged
# event for each response. The fields will be added to the log {Buffer}
# after the response was successfully returned by the Rack application
# and no unhandled exception was thrown. You can either give (`Array`s
# of) `String`s here or `Proc`s which return a `String` or an `Array` of
# Strings when called. When including `Proc`s, they are evaluated with
# the response headers in a `::Rack::Utils::HeaderHash` object as a
# scope.
# See {Rackstash::Fields::Tags#merge!}.
attr_accessor :response_tags
# @param app [#call] A Rack application according to the Rack
# specification. A Rack application is a Ruby object (usually not a
# class) that responds to `call`. It takes exactly one argument, the
# `environment` and returns an `Array` of exactly three values: the
# `status`, the `headers`, and the `body`.
# @param logger [Rackstash::Logger] the {Rackstash::Logger} instance to
# log each request to
# @param buffering [Symbol, Boolean] define how the created {Buffer}s
# should buffer stored data. See {Buffer#buffering} for details.
# @param request_fields [Hash<#to_s, => Proc, Object>, Fields::Hash, Proc]
# Additional fields to merge into the emitted log event before
# processing the request. If the object itself or any of its hash values
# is a `Proc`, it will get called, passing the `Rack::Request` object
# for the current request as an argument, and its result is used
# instead.
# @param request_tags [Array<#to_s, Proc>, Set<#to_s, Proc>, Proc] an
# `Array` specifying default tags for each request. You can either give
# a literal `Array` containing Strings or a `Proc` which returns such an
# `Array`. If the object itself or any of its values is a `Proc`, it is
# called, passing the `Rack::Request` object for the current request
# as an argument, and its result is used instead.
# @param response_fields [Hash<#to_s, => Proc, Object>, Fields::Hash, Proc]
# Additional fields to merge into the emitted log event after processing
# the request and sending the complete response. If the object itself
# or any of its hash values is a `Proc`, it will get called, passing
# the `Hash` of response headers of the current response as an argument,
# and its result is used instead.
# @param response_tags [Array<#to_s, Proc>, Set<#to_s, Proc>, Proc] an
# `Array` specifying default tags for each returned response. You can
# either give a literal `Array` containing Strings or a `Proc` which
# returns such an `Array`. If the object itself or any of its values is
# a `Proc`, it is called, passing the `Hash` of response headers of the
# current response as an argument, and its result is used instead.
# @raise [TypeError] if the passed `logger` is not a {Rackstash::Logger}
def initialize(
app, logger, buffering: :full,
request_fields: nil, request_tags: nil,
response_fields: nil, response_tags: nil
)
unless logger.is_a?(Rackstash::Logger)
raise TypeError, 'logger must be a Rackstash::Logger'
end
@app = app
@logger = logger
@buffering = buffering
@request_fields = request_fields
@response_fields = response_fields
@request_tags = request_tags
@response_tags = response_tags
end
# Push a new {Buffer} to the stack and run the Rack app for the request.
# All information logged by the Rack app during the request will be added
# to this {Buffer} which will be flushed after the request returned.
#
# @param env [Hash] the Rack environment Hash
# @return [Array] the three-element array containing the numeric response
# status, the response headers and the body according to the Rack
# specification.
def call(env)
began_at = clock_time
env['rackstash.logger'.freeze] = @logger
env['rack.logger'.freeze] = @logger
@logger.push_buffer(buffering: @buffering, allow_silent: true)
begin
@logger.timestamp
on_request(env)
response = @app.call(env)
rescue Exception => exception
buffer = @logger.pop_buffer
if buffer
begin
on_error(buffer, env, began_at, exception)
ensure
buffer.flush
end
end
raise
end
buffer = @logger.pop_buffer
on_response(buffer, env, began_at, response) if buffer
response
end
private
# @param env [Hash] The Rack environment
# @return [void]
def on_request(env)
log_request(env)
end
# (see #on_request)
def log_request(env)
request = ::Rack::Request.new(env)
@logger.fields[FIELD_METHOD] = request.request_method
@logger.fields[FIELD_PATH] = request.fullpath
@logger.fields.deep_merge!(
@request_fields,
scope: request,
force: false
) unless @request_fields.nil?
@logger.tag(@request_tags, scope: request) unless @request_tags.nil?
end
# @param buffer [Rackstash::Buffer] the log {Buffer} which was captured
# the logs during the request in the lower layers.
# @param env [Hash] the Rack environment
# @param began_at [Float] a timestamp denoting the start of the request.
# It can be used to get a request duration by subtracting it from
# {#clock_time}.
# @param response [Array] a three-element array containing the Rack
# response. It consists of the numeric `status` code, a Hash containing
# the response `headers`, and the response `body`.
#
# @return [void]
def on_response(buffer, env, began_at, response)
response[1] = ::Rack::Utils::HeaderHash.new(response[1])
response[2] = ::Rack::BodyProxy.new(response[2]) do
begin
log_response(buffer, env, began_at, response[0], response[1])
rescue Exception => exception
on_error(buffer, env, began_at, exception)
raise
ensure
buffer.flush
end
end
response
end
# Log data from the response to the buffer for the request.
#
# Note that we are directly writing to the `buffer` here, not the
# `@logger`. This is necessary because `log_response` is called very late
# during response processing by the application server where the buffer is
# already poped from the stack.
#
# @param buffer (see #on_response)
# @param env (see #on_response)
# @param began_at (see #on_response)
# @param status [Integer] the numeric response status
# @param headers [::Rack::Utils::HeaderHash] the response headers
#
# @return [void]
def log_response(buffer, env, began_at, status, headers)
fields = {
FIELD_STATUS => ::Rack::Utils.status_code(status),
FIELD_DURATION => (clock_time - began_at).round(ISO8601_PRECISION)
}
buffer.fields.merge!(fields, force: false)
buffer.fields.deep_merge!(
@response_fields,
scope: headers,
force: false
) unless @response_fields.nil?
buffer.tag(@response_tags, scope: headers) unless @response_tags.nil?
end
# @param buffer [Rackstash::Buffer] the log {Buffer} which was captured
# the logs during the request in the lower layers.
# @param env [Hash] The Rack environment
# @param began_at [Float] a timestamp denoting the start of the request.
# It can be used to get a request duration by subtracting it from
# {#clock_time}.
# @param exception [Exception] a rescued exception
#
# @return [void]
def on_error(buffer, env, began_at, exception)
log_error(buffer, env, began_at, exception)
end
# (see #on_error)
def log_error(buffer, env, began_at, exception)
buffer.add_exception(exception, force: false)
# Always set the status to 500, even if the app returned a successful
# status. This is necessary to reflect the true status upsteam in case
# there was an exception on adding request fields
buffer.fields[FIELD_STATUS] = 500
buffer.fields.set(FIELD_DURATION, force: false) {
(clock_time - began_at).round(ISO8601_PRECISION)
}
end
end
end
end

View File

@ -0,0 +1,250 @@
# frozen_string_literal: true
#
# Copyright 2017 Holger Just
#
# This software may be modified and distributed under the terms
# of the MIT license. See the LICENSE.txt file for details.
require 'spec_helper'
require 'rackstash/rack/middleware'
describe Rackstash::Rack::Middleware do
let(:app) {
lambda { |env|
logger = env['rack.logger']
logger.debug('Request started')
logger.warn('Nothing to do...')
env['process'].call(env) if env['process']
# Raise a RuntimeError if a raise parameter was set in the request
request = ::Rack::Request.new(env)
raise RuntimeError, request.params['raise'] if request.params['raise']
[200, {'Content-Type' => 'text/plain'}, ["Hello, World!"]]
}
}
let(:log) { [] }
let(:logger) { Rackstash::Logger.new ->(event) { log << event } }
let(:args) { {} }
let(:stack) { described_class.new(app, logger, **args)}
def get(path, opts = {})
::Rack::MockRequest.new(::Rack::Lint.new(stack)).get(path, opts)
end
it 'requires a logger' do
# missing logger
expect { described_class.new app }.to raise_error ArgumentError
# unsupported logger
expect { described_class.new app, ::Logger.new(STDOUT) }.to raise_error TypeError
expect { described_class.new app, nil }.to raise_error TypeError
expect { described_class.new app, STDOUT }.to raise_error TypeError
end
it 'sets Buffer#buffering' do
args[:buffering] = :data
get('/stuff')
expect(log).to match [
include('message' => "Request started\n", 'method' => 'GET', 'path' => '/stuff'),
include('message' => "Nothing to do...\n", 'method' => 'GET', 'path' => '/stuff'),
include('message' => '', 'status' => 200)
]
end
it 'creates a new log scope' do
2.times do
expect(logger).to receive(:push_buffer).ordered.and_call_original
expect(logger).to receive(:pop_buffer).ordered.and_call_original
end
get('/foo')
get('/bar')
end
it 'sets rack.logger environment variable' do
called = false
app = lambda do |env|
called = true
expect(env['rack.logger']).to equal logger
[200, {'Content-Type' => 'text/plain'}, ["Hello, World!"]]
end
::Rack::MockRequest.new(described_class.new(app, logger)).get('/')
expect(called).to be true
end
it 'sets rackstash.logger environment variable' do
called = false
app = lambda do |env|
called = true
expect(env['rackstash.logger']).to equal logger
[200, {'Content-Type' => 'text/plain'}, ["Hello, World!"]]
end
::Rack::MockRequest.new(described_class.new(app, logger)).get('/')
expect(called).to be true
end
it 'logs basic request data' do
get('/demo')
expect(log.last).to match({
'method' => 'GET',
'path' => '/demo',
'status' => 200,
'duration' => be_a(Float).and(be > 0),
'message' => "Request started\nNothing to do...\n",
'@timestamp' => /\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d.\d{6}Z/,
'tags' => []
})
end
context 'with request_fields' do
it 'logs additional request fields' do
args[:request_fields] = lambda { |request|
{
'do' => request.request_method.downcase,
:massive => -> { fullpath.upcase }
}
}
get('/stuff')
expect(log.last).to include(
'do' => 'get',
'massive' => '/STUFF',
'path' => '/stuff',
'method' => 'GET'
)
end
end
context 'with request_tags' do
it 'logs additional tags' do
args[:request_tags] = lambda { |request|
[[:foo], request.fullpath.upcase[/\w+/]]
}
get('/stuff')
expect(log.last).to include('tags' => ['foo', 'STUFF'])
end
end
context 'with response fields' do
it 'logs additional fields' do
args[:response_fields] = {
:content_type => ->(headers) { headers['Content-Type'] },
'static' => 'value'
}
get('/stuff')
expect(log.last).to include(
'path' => '/stuff',
'method' => 'GET',
'content_type' => 'text/plain',
'static' => 'value'
)
end
it 'retains existing fields' do
args[:response_fields] = { 'path' => 'foo' }
get('/stuff')
expect(log.last).to include 'path' => '/stuff'
end
end
context 'with response_tags' do
it 'logs additional tags' do
args[:response_tags] = lambda { |headers|
[[:foo], headers['Content-Type'][/\w+/]]
}
get('/stuff')
expect(log.last).to include('tags' => ['foo', 'text'])
end
end
describe 'on errors' do
it 'logs errors' do
expect { get('/error', params: { raise: 'Oh noes!' }) }
.to raise_error(RuntimeError, 'Oh noes!')
expect(log.last).to include(
'error' => 'RuntimeError',
'error_message' => 'Oh noes!',
'error_trace' => %r{\A#{__FILE__}:24:in},
)
end
it 'sets the status to 500' do
expect { get('/error', params: { raise: 'bum' }) }.to raise_error('bum')
expect(log.last).to include 'status' => 500
end
it 'always logs request params first' do
expect { get('/error', params: { raise: 'bum' }) }.to raise_error('bum')
expect(log.last).to include(
'method' => 'GET',
'path' => '/error?raise=bum'
)
end
it 'handles errors on setting request_fields' do
args[:request_fields] = -> {
{
'foo' => 'bar',
'error' => -> { raise 'kaputt' }
}
}
expect { get('/normal') }.to raise_error('kaputt')
expect(log.last).to include(
'error' => 'RuntimeError',
'error_message' => 'kaputt',
'error_trace' => %r{\A#{__FILE__}:#{__LINE__ - 8}:in},
# The message is empty since we never even called the app.
'message' => '',
'status' => 500
)
# None of the response fields are set if normalization fails
expect(log.last).to_not include 'foo'
end
it 'handles errors on setting response_fields' do
args[:response_fields] = -> {
{
'foo' => 'bar',
'error' => -> { raise 'kaputt' }
}
}
expect { get('/normal') }.to raise_error('kaputt')
expect(log.last).to include(
'error' => 'RuntimeError',
'error_message' => 'kaputt',
'error_trace' => %r{\A#{__FILE__}:#{__LINE__ - 8}:in},
# The app did its thing
'message' => "Request started\nNothing to do...\n",
# We explicitly override the logged status, even if the app returned a
# successful response earlier
'status' => 500
)
# None of the response fields are set if normalization fails
expect(log.last).to_not include 'foo'
end
end
end