From 3b5ff3a4dca8b624d2c4dfe43f89eb6f17e31b70 Mon Sep 17 00:00:00 2001 From: Holger Just Date: Thu, 19 Apr 2018 22:37:57 +0200 Subject: [PATCH] Normalize fields more consistently in encoders --- lib/rackstash/encoder/helper/message.rb | 47 ++++++++++++------- lib/rackstash/encoder/helper/timestamp.rb | 29 ++++++++---- lib/rackstash/encoder/json.rb | 2 +- lib/rackstash/encoder/message.rb | 30 ++++++++---- spec/rackstash/encoder/hash_spec.rb | 2 +- spec/rackstash/encoder/helper/message_spec.rb | 15 +++--- .../encoder/helper/timestamp_spec.rb | 38 +++++++++++++-- 7 files changed, 116 insertions(+), 47 deletions(-) diff --git a/lib/rackstash/encoder/helper/message.rb b/lib/rackstash/encoder/helper/message.rb index 3be19df..a1df14d 100644 --- a/lib/rackstash/encoder/helper/message.rb +++ b/lib/rackstash/encoder/helper/message.rb @@ -15,29 +15,42 @@ module Rackstash # Normalize the `"message"` field of the given log event Hash. # + # @param event [Hash] a log event Hash + # @param field [String] the name of the field to normalize in the + # `event` + # @param force [Boolean] set to `true` to always set the `field` to a + # normalized value, even if it was empty or missing before + # @return [Hash] the given event with the normalized message + def normalize_message(event, field = FIELD_MESSAGE, force: false) #:doc: + message = event[field] + return event if message.nil? && !force + + event[field] = normalized_message(message) + event + end + + # Normalize various possible values in the `"message"` field of an event + # Hash into a single String. + # # While the filters still had access to the array of {Message} objects # for filtering, we now concatenate the raw message objects as a single # string to get the final message which is set on the `event["message"]` # key. # - # We expect that the single messages already contain trailing newline - # characters is deemed useful. These are usually added by the formatter - # of the frontend {Logger}. + # Usually, the individual messages already contain trailing newline + # characters. By default, these are added by the formatter of the + # {Logger} when the message is originally logged. As such, we + # concatenate all messages without an added separators. # - # @param event [Hash] a log event Hash - # @return [Hash] the given event with the `"message"` key set as a - # single string. - def normalize_message(event) #:doc: - event[FIELD_MESSAGE] = - case event[FIELD_MESSAGE] - when Array - event[FIELD_MESSAGE].map!(&:to_s).join - when nil - '' - else - event[FIELD_MESSAGE].to_s - end - event + # @param message [Object] the message(s) to normalize + # @return [String] the normalized event message as a single String + def normalized_message(message) #:doc: + case message + when Array + message.map(&:to_s).join + else + message.to_s + end end end end diff --git a/lib/rackstash/encoder/helper/timestamp.rb b/lib/rackstash/encoder/helper/timestamp.rb index ecd30d5..57222c8 100644 --- a/lib/rackstash/encoder/helper/timestamp.rb +++ b/lib/rackstash/encoder/helper/timestamp.rb @@ -18,7 +18,7 @@ module Rackstash private # Normalize the `"@timestamp"` field of the given log event Hash. - # Before any filters, only the `"@timestamp"` fueld contains a `Time` + # Before any filters, only the `"@timestamp"` field contains a `Time` # object denoting the timestamp of the log event. To represent this # timestamp in logs, it is formatted as an ISO 8601 string. The # timestamp will always be changed into UTC. @@ -26,19 +26,32 @@ module Rackstash # @param event [Hash] a log event Hash # @param field [String] the name of the timestamp field in the event # hash. By default, we use the `"@timestamp"` field. + # @param force [Bool] set to `true` to use the current time if the + # existing timestamp could not be interpreted as a timestamp # @return [Hash] the given event with the `field` key set as an ISO 8601 # formatted time string. - def normalize_timestamp(event, field = FIELD_TIMESTAMP) #:doc: - time = event[field] + def normalize_timestamp(event, field = FIELD_TIMESTAMP, force: false) #:doc: + time = normalized_time(event[field]) + time ||= Time.now.utc if force - if time.is_a?(Time) || time.is_a?(DateTime) - time = time.to_time - time = time.getutc unless time.utc? - event[field] = time.iso8601(ISO8601_PRECISION).freeze - end + event[field] = time.iso8601(ISO8601_PRECISION) if time event end + + # @param time [Time, DateTime, Date, Integer, Float] + # @return [Time, nil] a `Time` object on UTC timezone if the given + # `time` could be normalized as such or `nil` if the value does not + # describe a timestamp + def normalized_time(time) #:doc: + case time + when ::Time, ::DateTime + time = time.to_time + time.utc? ? time : time.getutc + when ::Date + ::Time.new(time.year, time.month, time.day, 0, 0, 0, 0).utc + end + end end end end diff --git a/lib/rackstash/encoder/json.rb b/lib/rackstash/encoder/json.rb index 78f38d9..a21ee54 100644 --- a/lib/rackstash/encoder/json.rb +++ b/lib/rackstash/encoder/json.rb @@ -28,7 +28,7 @@ module Rackstash # @param event [Hash] a log event as produced by the {Flow} # @return [String] the event as a single-line JSON string def encode(event) - normalize_message(event) unless event[FIELD_MESSAGE].nil? + normalize_message(event) normalize_timestamp(event) ::JSON.dump(event) diff --git a/lib/rackstash/encoder/message.rb b/lib/rackstash/encoder/message.rb index dfadc7a..0aec3d9 100644 --- a/lib/rackstash/encoder/message.rb +++ b/lib/rackstash/encoder/message.rb @@ -5,6 +5,9 @@ # This software may be modified and distributed under the terms # of the MIT license. See the LICENSE.txt file for details. +require 'date' +require 'time' + require 'rackstash/encoder' require 'rackstash/encoder/helper/message' @@ -44,14 +47,12 @@ module Rackstash # @param event [Hash] a log event as produced by the {Flow} # @return [String] the formatted message of the event def encode(event) - normalize_message(event) message = event[FIELD_MESSAGE] + return EMPTY_STRING if message.nil? + message = normalized_message(message) unless message.empty? - tags = @tagged_fields.map { |key| - normalize_timestamp(event, key) - format_tag event[key] - }.compact.join + tags = @tagged_fields.map { |key| format_tag event[key] }.join message = message.gsub(/^/) { tags } unless tags.empty? end @@ -61,13 +62,24 @@ module Rackstash private def format_tag(value) + return EMPTY_STRING if value.nil? + + "[#{format_tag_value(value)}] " + end + + def format_tag_value(value) case value - when nil - nil when ::Array - "[#{value.map(&:to_s).join(',')}] " + value.map { |v| format_tag_value(v) }.join(',') + when ::Hash + value.map { |k, v| "#{k.to_s}:#{format_tag_value(v)}" }.join(',') + when ::Time, ::DateTime + value = value.to_time.getutc + value.iso8601(ISO8601_PRECISION) + when ::Date + value.iso8601(ISO8601_PRECISION) else - "[#{value}] " + value.to_s end end end diff --git a/spec/rackstash/encoder/hash_spec.rb b/spec/rackstash/encoder/hash_spec.rb index c89da42..6410419 100644 --- a/spec/rackstash/encoder/hash_spec.rb +++ b/spec/rackstash/encoder/hash_spec.rb @@ -29,7 +29,7 @@ RSpec.describe Rackstash::Encoder::Hash do it 'passes the normalized event hash through' do event = { 'foo' => 'bar', 'baz' => :boing } expect(encoder.encode(event)) - .to eql 'foo' => 'bar', 'baz' => :boing, 'message' => '' + .to eql 'foo' => 'bar', 'baz' => :boing end end end diff --git a/spec/rackstash/encoder/helper/message_spec.rb b/spec/rackstash/encoder/helper/message_spec.rb index 14900ec..b8588de 100644 --- a/spec/rackstash/encoder/helper/message_spec.rb +++ b/spec/rackstash/encoder/helper/message_spec.rb @@ -23,21 +23,20 @@ RSpec.describe Rackstash::Encoder::Helper::Message do describe '#normalize_message' do it 'concatenates the message array' do - event['message'] = ["a\n", "b\n"] + event['message'] = ["a\n", "b\n", 42] - expect(helper.normalize_message(event)).to eql 'message' => "a\nb\n" + expect(helper.normalize_message(event)).to eql 'message' => "a\nb\n42" end - it 'sets message to an empty string if not present' do + it 'does not set a missing message' do + expect(helper.normalize_message(event)).to eql Hash.new + event['message'] = nil - expect(helper.normalize_message(event)).to eql 'message' => '' + expect(helper.normalize_message(event)).to eql 'message' => nil end it 'enforces to_s on other messages' do - foo = String.new('foo') - event['message'] = foo - - expect(foo).to receive(:to_s).and_call_original + event['message'] = :foo expect(helper.normalize_message(event)).to eql 'message' => 'foo' end end diff --git a/spec/rackstash/encoder/helper/timestamp_spec.rb b/spec/rackstash/encoder/helper/timestamp_spec.rb index c87feb2..538eadb 100644 --- a/spec/rackstash/encoder/helper/timestamp_spec.rb +++ b/spec/rackstash/encoder/helper/timestamp_spec.rb @@ -34,7 +34,13 @@ RSpec.describe Rackstash::Encoder::Helper::Timestamp do .to eql '2016-10-17T10:37:00.000000Z' end - it 'ignores an unset value' do + it 'formats a Date object' do + event['@timestamp'] = Date.new(2016, 10, 17) + expect(helper.normalize_timestamp(event).fetch('@timestamp')) + .to eql '2016-10-17T00:00:00.000000Z' + end + + it 'ignores an unset value by default' do expect(helper.normalize_timestamp(event)).not_to have_key '@timestamp' end @@ -45,8 +51,11 @@ RSpec.describe Rackstash::Encoder::Helper::Timestamp do event['@timestamp'] = nil expect(helper.normalize_timestamp(event).fetch('@timestamp')).to eql nil - event['@timestamp'] = 42 - expect(helper.normalize_timestamp(event).fetch('@timestamp')).to eql 42 + event['@timestamp'] = 123 + expect(helper.normalize_timestamp(event).fetch('@timestamp')).to eql 123 + + event['@timestamp'] = 3.14 + expect(helper.normalize_timestamp(event).fetch('@timestamp')).to eql 3.14 end it 'uses the given field name' do @@ -58,5 +67,28 @@ RSpec.describe Rackstash::Encoder::Helper::Timestamp do 'custom' => '2016-10-17T13:42:00.000000Z' ) end + + context 'with force: true' do + let(:time) { Time.parse('2016-10-17 13:37:00 +03:00') } + + before do + allow(Time).to receive(:now).and_return(time) + end + + it 'initializes an unset value' do + expect(helper.normalize_timestamp(event, force: true).fetch('@timestamp')) + .to eql '2016-10-17T10:37:00.000000Z' + end + + it 'uses the current time for unknown values' do + event['@timestamp'] = 'string' + expect(helper.normalize_timestamp(event, force: true).fetch('@timestamp')) + .to eql '2016-10-17T10:37:00.000000Z' + + event['@timestamp'] = :symbol + expect(helper.normalize_timestamp(event, force: true).fetch('@timestamp')) + .to eql '2016-10-17T10:37:00.000000Z' + end + end end end