diff --git a/lib/rackstash/filters.rb b/lib/rackstash/filters.rb index f41cca9..5be270a 100644 --- a/lib/rackstash/filters.rb +++ b/lib/rackstash/filters.rb @@ -6,6 +6,7 @@ require 'rackstash/filters/clear_color' require 'rackstash/filters/skip_event' +require 'rackstash/filters/truncate_message' module Rackstash # Filters are part of a {Flow} where they can alter the log event before it is diff --git a/lib/rackstash/filters/truncate_message.rb b/lib/rackstash/filters/truncate_message.rb new file mode 100644 index 0000000..d1a151a --- /dev/null +++ b/lib/rackstash/filters/truncate_message.rb @@ -0,0 +1,111 @@ +# frozen_string_literal: true +# Copyright 2017 Holger Just +# +# This software may be modified and distributed under the terms +# of the MIT license. See the LICENSE.txt file for details. + +module Rackstash + module Filters + # The Truncate filter can be used to restrict the size of the emitted + # message. By selectively deleting parts until the message size fits into + # the defined limit, you can ensure that log events are properly handled by + # downstream systems. + # + # We are performing the following steps, until the overall size of all + # messages in the event is below the given maximum size or there is only one + # message left, regardless of size: + # + # * Pass each message to the first selector, one after another. Each message + # for which the selector returns `false` or `nil` is deleted. Repeat this + # for each given selector until either the overall message size is below + # the defined `max_size` or there are no further selectors. + # * If the overall message size is still above `max_size`, we start to + # delete messages at the `truncate` location until we have either achieved + # the desired size limit or we have only one message left. With + # `cut: top` we start to delete messages first at the beginning of the + # message list, with `cut: :bottom` (the default) with the very last + # message and with `cut: :middle` we are deleting from the middle of the + # message list preserving the messages at the beginning and the end. + # + # Note that in any case, we are only ever deleting whole messages (which + # usually but not necessarily amount to whole lines). We are not splitting + # messages. + # + # @example + # Rackstash::Flow.new(STDERR) do + # # Truncate the message to at most 1 MByte. + # # We try the following steps to cut a too large message down: + # # * We select all messages with at least INFO level, removing debug + # # messages. + # # * If it's still too large, we also remove INFO messages, keeping + # # only messages with a WARN severity or above + # # * If it's still too large, we remove log lines from the middle of + # # the messages until we reach the desired size. + # filter Rackstash::Filters::TruncateMessage.new( + # 1_000_000, + # selectors: [ + # ->(message) { message.severity >= Rackstash::INFO }, + # ->(message) { message.severity >= Rackstash::WARN } + # ], + # cut: :middle + # ) + # end + class TruncateMessage + # @param max_size [Integer] The maximum desired number of characters for + # all the messages in an event combined + # @param selectors [Array<#call>] An optional list of message filters + # (e.g. `Proc` objects) which accept a single message. When returning + # `nil` or `false`, the message is rejected. + # @param cut [Symbol] where to start removing messages if the message list + # is still too large after all filters were applied. One of `:top`, + # `:middle`, or `:bottom`. + def initialize(max_size, selectors: [], cut: :bottom) + @max_size = Integer(max_size) + @selectors = Array(selectors) + + unless %i[top middle bottom].include?(cut) + raise ArgumentError, 'cut must be one of :top, :middle, :bottom' + end + @cut = cut + end + + # Remove messages if the overall size in bytes of all the messages in the + # given event is larger than the desired `max_size`. + # + # @param event [Hash] an event hash + # @return [Hash] the given `event` with the some messages potentially + # removed + def call(event) + messages = event[FIELD_MESSAGE] + return event unless Array === messages + + @selectors.each do |selector| + return event if overall_size_of(messages) <= @max_size || messages.size <= 1 + messages.select! { |message| selector.call(message) } + end + return event if messages.size <= 1 + + overall_size = overall_size_of(messages) + until overall_size <= @max_size || messages.size <= 1 + case @cut + when :top + msg = messages.shift + when :middle + msg = messages.delete_at(messages.size / 2) + when :bottom + msg = messages.pop + end + overall_size -= msg.size + end + + event + end + + private + + def overall_size_of(messages) + messages.inject(0) { |sum, msg| sum + msg.size } + end + end + end +end diff --git a/spec/rackstash/filters/truncate_message_spec.rb b/spec/rackstash/filters/truncate_message_spec.rb new file mode 100644 index 0000000..8c02d26 --- /dev/null +++ b/spec/rackstash/filters/truncate_message_spec.rb @@ -0,0 +1,87 @@ +# frozen_string_literal: true +# Copyright 2017 Holger Just +# +# This software may be modified and distributed under the terms +# of the MIT license. See the LICENSE.txt file for details. + +require 'spec_helper' + +require 'rackstash/filters/truncate_message' + +describe Rackstash::Filters::TruncateMessage do + let(:max_size) { 30 } + let(:args) { { selectors: [], cut: :bottom } } + let(:filter) { described_class.new(max_size, **args) } + + let(:messages) { ['some long message', 'sweet middle text', 'final message'] } + let(:event) { { 'message' => messages } } + + describe '#initialize' do + it 'verifies that a valid cut value is given' do + expect { described_class.new(42, cut: 'foo') }.to raise_error(ArgumentError) + expect { described_class.new(42, cut: :foo) }.to raise_error(ArgumentError) + expect { described_class.new(42, cut: false) }.to raise_error(ArgumentError) + expect { described_class.new(42, cut: nil) }.to raise_error(ArgumentError) + end + end + + describe '#call' do + context 'with selectors' do + it 'calls all selectors' do + selector_1 = instance_double('Proc') + selector_2 = instance_double('Proc') + args[:selectors] = [selector_1, selector_2] + + expect(selector_1).to receive(:call).exactly(3).times.and_return(true) + expect(selector_2).to receive(:call).exactly(3).times.and_return(true) + + filter.call(event) + end + + it 'stops on goal' do + selector_1 = instance_double('Proc') + selector_2 = instance_double('Proc') + args[:selectors] = [selector_1, selector_2] + + expect(selector_1).to receive(:call).exactly(3).times.and_return(false) + expect(selector_2).not_to receive(:call) + + filter.call(event) + expect(messages).to be_empty + end + end + + context 'with cut: :top' do + before(:each) do + args[:cut] = :top + end + + it 'removes the messages at the beginning' do + filter.call(event) + expect(messages).to eql ['sweet middle text', 'final message'] + end + end + + context 'with cut: :middle' do + before(:each) do + args[:cut] = :middle + end + + it 'removes the messages in the middle' do + filter.call(event) + expect(messages).to eql ['some long message', 'final message'] + end + end + + context 'with cut: :bottom' do + before(:each) do + args[:cut] = :bottom + end + + it 'removes the messages at the end' do + filter.call(event) + expect(messages).to eql ['some long message'] + end + end + end +end