diff --git a/lib/rackstash.rb b/lib/rackstash.rb index 8715425..90544bd 100644 --- a/lib/rackstash.rb +++ b/lib/rackstash.rb @@ -173,6 +173,7 @@ require 'rackstash/adapter/logger' require 'rackstash/adapter/io' require 'rackstash/adapter/null' +require 'rackstash/encoder/gelf' require 'rackstash/encoder/hash' require 'rackstash/encoder/json' require 'rackstash/encoder/lograge' diff --git a/lib/rackstash/encoder/gelf.rb b/lib/rackstash/encoder/gelf.rb new file mode 100644 index 0000000..80e2968 --- /dev/null +++ b/lib/rackstash/encoder/gelf.rb @@ -0,0 +1,206 @@ +# frozen_string_literal: true +# +# Copyright 2017 Holger Just +# +# This software may be modified and distributed under the terms +# of the MIT license. See the LICENSE.txt file for details. + +require 'date' +require 'socket' +require 'time' + +require 'rackstash/encoder' +require 'rackstash/encoder/helper/fields_map' +require 'rackstash/encoder/helper/message' +require 'rackstash/helpers/utf8' + +module Rackstash + module Encoder + # The Graylog Extended Log Format (GELF) is the native format used by + # [Graylog](https://www.graylog.org). By formatting Rackstash's log events + # as GELF, you can directly send thm to a Graylog server for storage and + # further processing. + # + # GELF is based in JSON with some additional restrictions. Please see + # [the specification](http://docs.graylog.org/en/2.4/pages/gelf.html) for + # details. The GELF encoder returns the log event as a JSON-encoded `String` + # without any literal newline characters. + # + # To send Rackstash log events to a Graylog server, you can use the + # {Adapter::GELF} adapter to send the formatted GELF payload to your Graylog + # server using any of the supported transport protocols. + # + # @see http://docs.graylog.org/en/2.4/pages/gelf.html + class GELF + include Rackstash::Encoder::Helper::FieldsMap + include Rackstash::Encoder::Helper::Message + include Rackstash::Helpers::UTF8 + + # The default mapping of GELF fields (the keys) to fields in the final + # Rackstash event hash (the value). You can overwrite this mapping by + # setting the `fields` parameter in {#initialize}. + DEFAULT_FIELDS = { + host: nil, # local hostname by default + level: nil, # highest severity of an event message mapped to a syslog level + short_message: FIELD_MESSAGE, + # omitted by default + long_message: nil, + + # The event's timestamp + timestamp: FIELD_TIMESTAMP + }.freeze + + # Mapping of Rackstash log severities to the syslog levels used by GELF + GELF_LEVELS = { + DEBUG => 7, # Debug + INFO => 6, # Informational + WARN => 5, # Notice + ERROR => 4, # Warning + FATAL => 3, # Error + UNKNOWN => 1 # Alert – shouldn't be used + }.freeze + + # @param fields [Hash String, nil>] a mapping of standard fields + # in the emitted GELF message (the Hash keys) to their respective source + # fields in the passed Rackstash event (the values). By default, we use + # the {DEFAULT_FIELDS} mapping which can selectively be overwritten with + # this `fields` argument. All fields in the event Hash which are not + # mapped to one of the main GELF fields will be added as additional + # GELF fields. If the mapped value is `nil`, we do not include the field + # or set it with a default value. + # @param default_severity [Integer] The default log severity. One of the + # {SEVERITIES} constants. If the `level` field of the generated GELF + # message is not overwritten with another field and we can not determine + # a maximum severity from the event's messages, we emit the syslog level + # matching this severity in the `level` field of the generated message. + def initialize(fields: {}, default_severity: UNKNOWN) + set_fields_mapping(fields, DEFAULT_FIELDS) + @default_severity = Rackstash.severity(default_severity) + end + + # Encode the passed event Hash as a JSON string following the GELF + # specification. + # + # @param event [Hash] a log event as produced by the {Flow} + # @return [String] the GELF-formatted event as a single-line JSON string + def encode(event) + gelf = {} + + # > GELF spec version – "1.1"; MUST be set by client library. + gelf['version'] = '1.1'.freeze + + # > the name of the host, source or application that sent this message; + # > MUST be set by client library. + host = extract_field(:host, event) { Socket.gethostname } + gelf['host'] = utf8_encode(host) + + # > Seconds since UNIX epoch with optional decimal places for + # > milliseconds; SHOULD be set by client library. Will be set to the + # > current timestamp (now) by the server if absent. + timestamp = extract_field(:timestamp, event) + gelf['timestamp'] = gelf_timestamp(timestamp) + + # > the level equal to the standard syslog levels; optional, default is + # > `1` (ALERT) + # The default value of 1 corresponds to {UNKNOWN} in Rackstash. + level = extract_field(:level, event) { + GELF_LEVELS.fetch(max_message_severity(event)) { + GELF_LEVELS[@default_severity] + } + } + gelf['level'] = Integer(level) + + # Since we need the raw messages to find the GELF level above, we only + # now normalize the message array to a simple String here + normalize_message(event) + + # > a short descriptive message; MUST be set by client library. + short_message = extract_field(:short_message, event) { EMPTY_STRING } + gelf['short_message'] = utf8_encode(short_message) + + # > a long message that can i.e. contain a backtrace; optional. + # + # Since the field is optional, we only write this field if there is a + # value in our event hash + full_message = extract_field(:full_message, event) + gelf['full_message'] = utf8_encode(full_message) if full_message + + gelf.merge! additional_fields(event) + + ::JSON.dump(gelf) + end + + private + + def gelf_timestamp(timestamp) + time = case timestamp + when Time, DateTime, Date + timestamp.to_time + when String + Time.iso8601(timestamp) rescue Time.now.utc + when Integer, Float + timestamp + else + Time.now.utc + end + + time.to_f + end + + def max_message_severity(event) + messages = event[FIELD_MESSAGE] + return @default_severity unless messages.is_a?(Array) + + max_severity = nil + messages.each do |message| + next unless message.respond_to?(:severity) + severity = message.severity + + next if severity >= UNKNOWN + next if max_severity && severity < max_severity + + max_severity = severity + end + + max_severity || @default_severity + end + + def additional_fields(event) + additional_fields = {} + + event.each_pair do |key, value| + # "_id" is reserved, so use "__id" + key = '_id'.freeze if key == 'id'.freeze + add_additional_field(additional_fields, "_#{key}", value) + end + additional_fields + end + + def add_additional_field(result, key, value) + case value + when ::Hash + value.each_pair do |hash_key, hash_value| + add_additional_field(result, "#{key}.#{hash_key}", hash_value) + end + when ::Array + value.each_with_index do |array_value, index| + add_additional_field(result, "#{key}.#{index}", array_value) + end + when ::Time, ::DateTime + value = value.to_time.getutc + result[sanitize(key)] = value.iso8601(ISO8601_PRECISION) + when ::Date + result[sanitize(key)] = value.iso8601 + else + result[sanitize(key)] = value + end + end + + def sanitize(key) + key.gsub(/[^\w\.\-]/, '_'.freeze) + end + end + + register GELF, :gelf + end +end diff --git a/lib/rackstash/encoder/helper/fields_map.rb b/lib/rackstash/encoder/helper/fields_map.rb new file mode 100644 index 0000000..cd22404 --- /dev/null +++ b/lib/rackstash/encoder/helper/fields_map.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true +# +# Copyright 2018 Holger Just +# +# This software may be modified and distributed under the terms +# of the MIT license. See the LICENSE.txt file for details. + +require 'rackstash/helpers/utf8' + +module Rackstash + module Encoder + module Helper + # Some useful helper methods for {Rackstash::Encoder}s which help in + # normalizing and handling the message list in the event Hash. + module FieldsMap + include Rackstash::Helpers::UTF8 + + private + + def set_fields_mapping(fields, default = {}) + @fields_map = default.dup + Hash(fields).each_pair do |key, value| + @fields_map[key.to_sym] = utf8_encode(value) + end + end + + def extract_field(name, event) + field_name = @fields_map[name] + + field = event.delete(field_name) if field_name + field = yield(field_name) if field.nil? && block_given? + field + end + end + end + end +end diff --git a/spec/rackstash/encoder/gelf_spec.rb b/spec/rackstash/encoder/gelf_spec.rb new file mode 100644 index 0000000..435b170 --- /dev/null +++ b/spec/rackstash/encoder/gelf_spec.rb @@ -0,0 +1,246 @@ +# frozen_string_literal: true +# +# Copyright 2018 Holger Just +# +# This software may be modified and distributed under the terms +# of the MIT license. See the LICENSE.txt file for details. + +require 'spec_helper' + +require 'rackstash/encoder/gelf' + +RSpec.describe Rackstash::Encoder::GELF do + let(:encoder_args) { {} } + let(:encoder) { described_class.new(**encoder_args) } + + describe 'version field' do + it 'adds a fixed version' do + expect(encoder.encode({})).to include '"version":"1.1"' + end + end + + describe 'host field' do + it 'adds the current host by default' do + allow(Socket).to receive(:gethostname).and_return('foo') + expect(encoder.encode({})).to include '"host":"foo"' + end + + it 'uses the configured host field' do + encoder_args[:fields] = { host: 'host_field' } + + expect(Socket).not_to receive(:gethostname) + expect(encoder.encode('host_field' => 'foo')).to include '"host":"foo"' + expect(encoder.encode('host_field' => 'foo')).not_to include 'host_field' + end + + it 'adds the current host if the host field is missing' do + encoder_args[:fields] = { host: 'host_field' } + + allow(Socket).to receive(:gethostname).and_return('localhorst.example.com') + expect(encoder.encode({})).to include '"host":"localhorst.example.com"' + end + end + + describe 'timestamp field' do + it 'formats the event timestamp' do + event = { '@timestamp' => DateTime.new(2016, 10, 17, 16, 37, 0, '+03:00') } + expect(encoder.encode(event)).to include '"timestamp":1476711420.0' + expect(encoder.encode(event)).not_to include '@timestamp' + end + + it 'formats a DateTime' do + event = { '@timestamp' => DateTime.new(2016, 10, 17, 16, 37, 0, '+03:00') } + expect(encoder.encode(event)).to include '"timestamp":1476711420.0' + end + + it 'formats a Date' do + event = { '@timestamp' => Date.new(2016, 10, 17) } + expect(encoder.encode(event)).to include '"timestamp":1476655200.0' + end + + it 'formats a String' do + event = { '@timestamp' => '2016-10-17T16:37:00+03:00' } + expect(encoder.encode(event)).to include '"timestamp":1476711420.0' + end + + it 'formats an Integer' do + event = { '@timestamp' => 1476711420 } + expect(encoder.encode(event)).to include '"timestamp":1476711420.0' + end + + + it 'adds the current timestamp by default' do + allow(Time).to receive(:now) + .and_return Time.new(2017, 12, 17, 16, 37, 0, '+03:00') + expect(encoder.encode({})).to include '"timestamp":1513517820.0' + end + end + + describe 'level field' do + it 'uses the configured level field' do + encoder_args[:fields] = { level: 'level_field' } + + messages = [instance_double('Rackstash::Message', severity: 3)] + + expect(encoder.encode('message' => messages, 'level_field' => 1)) + .to include '"level":1' + expect(encoder.encode('message' => messages, 'level_field' => 1)) + .not_to include 'level_field' + end + + it 'sets the level to UNKNOWN without any messages' do + expect(encoder.encode({})).to include '"level":1' + end + + it 'extracts the highest severity from the messages to get the level' do + messages = [ + instance_double('Rackstash::Message', severity: 0), # DEBUG + instance_double('Rackstash::Message', severity: 3), # ERROR + instance_double('Rackstash::Message', severity: 1), # INFO + ] + + expect(encoder.encode('message' => messages)).to include '"level":4' + end + + it 'ignores invalid message severities' do + messages = [ + instance_double('Rackstash::Message', severity: 123), + instance_double('Rackstash::Message', severity: 5), + 'a string', + 32, + nil + ] + + expect(encoder.encode('message' => messages)).to include '"level":1' + expect(encoder.encode('message' => nil)).to include '"level":1' + expect(encoder.encode('message' => '')).to include '"level":1' + end + + context 'with default_severity' do + before do + encoder_args[:default_severity] = 1 + end + + it 'uses the custom default level' do + messages = [ + instance_double('Rackstash::Message', severity: 123), + instance_double('Rackstash::Message', severity: 5), + instance_double('Rackstash::Message', severity: -3) + ] + + expect(encoder.encode('message' => messages)).to include '"level":6' + expect(encoder.encode('message' => nil)).to include '"level":6' + expect(encoder.encode('message' => '')).to include '"level":6' + expect(encoder.encode('message' => [messages.last])).to include '"level":6' + expect(encoder.encode({})).to include '"level":6' + end + + it 'can set a higher level' do + messages = [ + instance_double('Rackstash::Message', severity: 4) + ] + + expect(encoder.encode('message' => messages)).to include '"level":3' + end + end + end + + describe 'short_message field' do + it 'adds the event message to the short_message field by default' do + expect(encoder.encode('message' => ["Hello\n", "World\n"])) + .to include '"short_message":"Hello\nWorld\n"' + end + + it 'uses the configured short_message field' do + encoder_args[:fields] = { short_message: 'gelf_message' } + + event = { + 'message' => ["Hello\n", "World\n"], + 'gelf_message' => 'Hello GELF' + } + + expect(encoder.encode(event)) + .to include('"short_message":"Hello GELF"') + .and include('"_message":"Hello\nWorld\n"') + end + + it 'sets an empty short_message if the configured field is missing' do + encoder_args[:fields] = { short_message: 'gelf_message' } + + expect(encoder.encode({})).to include('"short_message":""') + end + end + + describe 'full_message field' do + it 'does not include the field by default' do + expect(encoder.encode({})).not_to include 'full_message' + end + + it 'includes the field if configured and present' do + encoder_args[:fields] = { full_message: 'full' } + + expect(encoder.encode('full' => 'GELF MESSAGE')) + .to include '"full_message":"GELF MESSAGE"' + end + + it 'does not include the field if configured and NOT present' do + encoder_args[:fields] = { full_message: 'full' } + + expect(encoder.encode({})).not_to include 'full_message' + end + end + + describe 'additional fields' do + it 'adds additional simple fields' do + expect(encoder.encode( + 'str' => 'hello world', + 'int' => 123, + 'f' => 3.14, + 'date' => Date.new(2017, 3, 6), + 'time' => Time.new(2017, 2, 17, 16, 37, 0, '+03:00'), + 'datetime' => DateTime.new(2016, 10, 7, 16, 37, 0, '+03:00') + )) + .to include('"_str":"hello world"') + .and include('"_int":123') + .and include('"_f":3.14') + .and include('"_date":"2017-03-06"') + .and include('"_time":"2017-02-17T13:37:00.000000Z"') + .and include('"_datetime":"2016-10-07T13:37:00.000000Z"') + end + + it 'normalizes keys' do + expect(encoder.encode('with spaces' => 'value', 'Motörhead' => "band")) + .to include('"_with_spaces":"value"') + .and include('"_Mot_rhead":"band"') + end + + it "transforms the id key" do + expect(encoder.encode('id' => 123, 'nested' => {'_id' => 42})) + .to include('"__id":123') + .and include('"_nested._id":42') + end + + it 'adds nested hashes' do + event = { + 'nested' => { + 'str' => 'beep', + 'int' => 123, + 'inner' => { 'foo' => 'bar' } + } + } + + expect(encoder.encode(event)) + .to include('"_nested.str":"beep"') + .and include('"_nested.int":123') + .and include('"_nested.inner.foo":"bar"') + end + + it 'adds nested arrays' do + expect(encoder.encode('array' => ['foo', 'bar', [123, 42]])) + .to include('"_array.0":"foo"') + .and include('"_array.1":"bar"') + .and include('"_array.2.0":123') + .and include('"_array.2.1":42') + end + end +end diff --git a/spec/rackstash/encoder/helper/fields_map_spec.rb b/spec/rackstash/encoder/helper/fields_map_spec.rb new file mode 100644 index 0000000..1d3573b --- /dev/null +++ b/spec/rackstash/encoder/helper/fields_map_spec.rb @@ -0,0 +1,64 @@ +# frozen_string_literal: true +# +# Copyright 2017 - 2018 Holger Just +# +# This software may be modified and distributed under the terms +# of the MIT license. See the LICENSE.txt file for details. + +require 'spec_helper' + +require 'rackstash/encoder/helper/fields_map' + +RSpec.describe Rackstash::Encoder::Helper::FieldsMap do + let(:helper) { + helper = Object.new.extend(described_class) + described_class.private_instance_methods(false).each do |method| + helper.define_singleton_method(method) do |*args, &block| + super(*args, &block) + end + end + helper + } + let(:event) { + { + 'foo' => 'hello', + 'bar' => 'world' + } + } + + describe '#extract_field' do + context 'with defaults' do + let(:defaults) { { default: 'foo' } } + + it 'uses default fields' do + helper.set_fields_mapping({ something: 'beep' }, defaults) + expect(helper.extract_field(:default, event)).to eql 'hello' + end + + it 'can overwrite default fields' do + helper.set_fields_mapping({ default: 'bar' }, defaults) + expect(helper.extract_field(:default, event)).to eql 'world' + end + end + + it 'returns the field value if it exists' do + helper.set_fields_mapping({}, { field: 'foo' }) + expect(helper.extract_field(:field, event)).to eql 'hello' + end + + it 'returns nil if the field does not exist' do + helper.set_fields_mapping({}, { field: 'invalid' }) + expect(helper.extract_field(:field, event)).to eql nil + end + + it 'returns the result of the given block if the field does not exist' do + helper.set_fields_mapping({}, { field: 'invalid' }) + expect(helper.extract_field(:field, event) { 123 }).to eql 123 + end + + it 'yield the resolved field name' do + helper.set_fields_mapping({}, { field: 'field' }) + expect { |b| helper.extract_field(:field, event, &b) }.to yield_with_args('field') + end + end +end