From fe44186da4a380cb0a85e816ff4a4ba2916426ce Mon Sep 17 00:00:00 2001 From: Holger Just Date: Wed, 3 Jan 2018 20:13:50 +0100 Subject: [PATCH] Add filter to mask of IP addresses in event fields --- lib/rackstash.rb | 3 +- lib/rackstash/filter/anonymize_ip_mask.rb | 157 ++++++++++++++++++ .../filter/anonymize_ip_mask_spec.rb | 113 +++++++++++++ 3 files changed, 272 insertions(+), 1 deletion(-) create mode 100644 lib/rackstash/filter/anonymize_ip_mask.rb create mode 100644 spec/rackstash/filter/anonymize_ip_mask_spec.rb diff --git a/lib/rackstash.rb b/lib/rackstash.rb index bffdcac..8715425 100644 --- a/lib/rackstash.rb +++ b/lib/rackstash.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true # -# Copyright 2017 Holger Just +# Copyright 2017 - 2018 Holger Just # # This software may be modified and distributed under the terms # of the MIT license. See the LICENSE.txt file for details. @@ -180,6 +180,7 @@ require 'rackstash/encoder/logstash' require 'rackstash/encoder/message' require 'rackstash/encoder/raw' +require 'rackstash/filter/anonymize_ip_mask' require 'rackstash/filter/clear_color' require 'rackstash/filter/default_fields' require 'rackstash/filter/default_tags' diff --git a/lib/rackstash/filter/anonymize_ip_mask.rb b/lib/rackstash/filter/anonymize_ip_mask.rb new file mode 100644 index 0000000..dc0917c --- /dev/null +++ b/lib/rackstash/filter/anonymize_ip_mask.rb @@ -0,0 +1,157 @@ +# frozen_string_literal: true +# +# Copyright 2018 Holger Just +# +# This software may be modified and distributed under the terms +# of the MIT license. See the LICENSE.txt file for details. + +require 'ipaddr' + +require 'rackstash/filter' + +module Rackstash + module Filter + # Anonymize found IP addresses by masking of a number of bits so that only + # the network of the address remains identifiable but the specific host + # remains anonymous. + # + # This is a very common approach to ensure a balance between direct + # identification of an IP address (e.g. a client IP of a web request) and + # the desire to anonymize it a bit. With the network still present, it is + # possible to roughly identify the source of the request and perform + # analysis. Usually, it's not possible anymore to identify a specific user + # anymore though. + # + # You can define the number of bits that should be masked of at the end of + # the IP address. This is not the same as a subnet mask, in fact, it is the + # inverse. By default, we mask of 8 bits for IPv4 addresses (so that a `/24` + # network remains) and 80 bits for IPv6 addresses (so that a `/80` network + # remains). + # + # Note that IPv4-mapped IPv6 addresses as well as IPv4-compatible IPv6 + # addresses are masked off as IPv4 addresses since they actually (more or + # less) represent an IPv4 address. + # + # We are writing raw String representations of the anonymized IP address to + # the target field: + # + # @example + # Rackstash::Flow.new(STDOUT) do + # # Anonymize IP addresses + # filter :anonymize_ip_mask, {'source_ip' => 'source_ip'} + # end + class AnonymizeIPMask + # @param field_spec [Hash<#to_s => #to_s>] a `Hash` specifying which + # fields should be anonymized and where the result should be stored. The + # key described the name of the existing source field and the value the + # name of field where the anonymized result should be stored. + # @param ipv4_mask [#to_i] The number of bits which are masked off at + # the end of an IPv4 address, i.e. that many bits at the end of an IPv4 + # address are set to 0. Must be between 1 and 32. + # @param ipv6_mask [#to_i] The number of bits which are masked off at + # the end of an IPv6 address, i.e. that many bits at the end of an IPv6 + # address are set to 0. Must be between 1 and 128. + def initialize(field_spec, ipv4_mask: 8, ipv6_mask: 80) + @fields = {} + Hash(field_spec).each_pair do |key, value| + @fields[key.to_s] = value.to_s + end + + @ipv4_mask = Integer(ipv4_mask) + unless @ipv4_mask.between?(1, 32) + raise ArgumentError, 'ipv4_mask must be between 1 and 32 bits' + end + + @ipv6_mask = Integer(ipv6_mask) + unless @ipv6_mask.between?(1, 128) + raise ArgumentError, 'ipv6_mask must be between 1 and 128 bits' + end + end + + # Anonymize configured fields with IP addresses in the given `event` hash + # by masking of the defined number of bits at the end. The anonymized IP + # address will then be written to the target key in the event hash as a + # simple String representation of the IP address. + # + # If we can not parse the value in a source field as an IP address, we + # will not write anything to the target key in the event hash. + # + # @example + # filter = Rackstash::Filter::AnonymizeIPMask.new('source_ip' => 'anonymized_ip') + # + # filter.call('source_ip' => '10.42.42.123') + # # => {'source_ip' => '10.42.42.123', 'anonymized_ip' => '10.42.42.0'} + # + # filter.call('source_ip' => '2400:cb00:2048:1::6810:1460') + # # => {'source_ip' => '2400:cb00:2048:1::6810:1460', 'anonymize_ip' => '2400:cb00:2048::'} + # + # # We are not writing the new value if a source can not be found + # filter.call('another_ip' => '192.168.42.123') + # # => {'another_ip' => '192.168.42.123'} + # + # @param event [Hash] an event hash + # @return [Hash] the given `event` with the defined IP fields encrypted + def call(event) + @fields.each_pair do |source, target| + value = anonymize(event[source]) + event[target] = value unless value.nil? + end + event + end + + # Anonymize a single IP address or an array of IP addresses by masking of + # trailing bits. When giving a single value, we return the masked IP as a + # `String` or `nil` if the given value is not a valid IP address. When + # giving multiple values (i.e. an `Array` of IP addresses), we try to + # anonymize each value separately. Only valid values will be included in + # the returned Array. + # + # @param value [#to_s, Array<#to_s>] the IP address(es) to anonymize + # @return [String, Array, nil] The anonymized IP or `nil` if the + # given `value` was invalid. When giving an Array, we return an array of + # anonymized IPs. Only value source values are included. + def anonymize(value) + case value + when Array + result = [] + value.each do |element| + anonymized = anonymize_value(element) + result << anonymized unless anonymized.nil? + end + result + when nil + nil + else + anonymize_value(value) + end + end + + private + + # Anonymize a single IP address + # @param value [#to_s] an IP address + # @return [String, nil] the anonymized IP address or `nil` if the given + # `value` was not a valid IP address + def anonymize_value(value) + begin + ip = IPAddr.new(value.to_s) + rescue ArgumentError + # IPAddr was not able to parse the value as an IPAddress + return nil + end + + if ip.ipv4? + masked_ip = ip.mask(32 - @ipv4_mask) + elsif ip.ipv4_compat? || ip.ipv4_mapped? + masked_ip = ip.mask(128 - @ipv4_mask) + elsif ip.ipv6? + masked_ip = ip.mask(128 - @ipv6_mask) + end + + masked_ip.to_s.force_encoding(Encoding::UTF_8) + end + end + + register AnonymizeIPMask, :anonymize_ip_mask + end +end diff --git a/spec/rackstash/filter/anonymize_ip_mask_spec.rb b/spec/rackstash/filter/anonymize_ip_mask_spec.rb new file mode 100644 index 0000000..3e58efc --- /dev/null +++ b/spec/rackstash/filter/anonymize_ip_mask_spec.rb @@ -0,0 +1,113 @@ +# frozen_string_literal: true +# +# Copyright 2018 Holger Just +# +# This software may be modified and distributed under the terms +# of the MIT license. See the LICENSE.txt file for details. + +require 'spec_helper' + +require 'rackstash/filter/anonymize_ip_mask' + +describe Rackstash::Filter::AnonymizeIPMask do + let(:event) { + { + 'ipv4' => '10.123.42.65', + 'ipv6' => '2400:cb00:2048:1::6810:1460', + 'ipv6_mapped' => '::FFFF:192.168.42.65', + 'ipv6_compat' => '::10.123.42.65', + 'invalid' => 'invalid', + 'array' => ['10.123.42.65', 123, 'foobar', '2400:cb00:2048:1::6810:1460'] + } + } + + let(:filter_spec) { {} } + let(:ipv4_mask) { 8 } + let(:ipv6_mask) { 80 } + let(:filter) { + described_class.new( + filter_spec, + ipv4_mask: ipv4_mask, + ipv6_mask: ipv6_mask + ) + } + + it 'masks IPv4 addresses' do + filter_spec['ipv4'] = 'anonymized' + filter.call(event) + + expect(event).to include( + 'ipv4' => '10.123.42.65', + 'anonymized' => '10.123.42.0' + ) + end + + it 'masks IPv6 addresses' do + filter_spec['ipv6'] = 'anonymized' + filter.call(event) + + expect(event).to include( + 'ipv6' => '2400:cb00:2048:1::6810:1460', + 'anonymized' => '2400:cb00:2048::' + ) + end + + it 'masks IPv4-mapped IPv6 addresses' do + filter_spec['ipv6_mapped'] = 'anonymized' + filter.call(event) + + expect(event).to include( + 'ipv6_mapped' => '::FFFF:192.168.42.65', + 'anonymized' => '::ffff:192.168.42.0' + ) + end + + it 'masks IPv4-compatible IPv6 addresses' do + filter_spec['ipv6_compat'] = 'anonymized' + filter.call(event) + + expect(event).to include( + 'ipv6_compat' => '::10.123.42.65', + 'anonymized' => '::10.123.42.0' + ) + end + + it 'retains invalid values' do + filter_spec['invalid'] = 'ignored' + filter.call(event) + + expect(event).to include 'invalid' => 'invalid' + expect(event).not_to include 'ignored' + end + + it 'ignores unknown values' do + filter_spec['unknown'] = 'ignored' + filter.call(event) + + expect(event).not_to include 'ignored' + end + + + it 'anonymizes arrays' do + filter_spec['array'] = 'anonymized' + filter.call(event) + + expect(event).to include 'anonymized' => ['10.123.42.0', '2400:cb00:2048::'] + end + + it 'fails with invalid arguments' do + expect { described_class.new({}, ipv4_mask: 0) }.to raise_error(ArgumentError) + expect { described_class.new({}, ipv4_mask: -3) }.to raise_error(ArgumentError) + expect { described_class.new({}, ipv4_mask: 33) }.to raise_error(ArgumentError) + expect { described_class.new({}, ipv4_mask: '/24') }.to raise_error(ArgumentError) + expect { described_class.new({}, ipv4_mask: false) }.to raise_error(TypeError) + expect { described_class.new({}, ipv4_mask: nil) }.to raise_error(TypeError) + + expect { described_class.new({}, ipv6_mask: 0) }.to raise_error(ArgumentError) + expect { described_class.new({}, ipv6_mask: -3) }.to raise_error(ArgumentError) + expect { described_class.new({}, ipv6_mask: 129) }.to raise_error(ArgumentError) + expect { described_class.new({}, ipv6_mask: '/80') }.to raise_error(ArgumentError) + expect { described_class.new({}, ipv6_mask: false) }.to raise_error(TypeError) + expect { described_class.new({}, ipv6_mask: nil) }.to raise_error(TypeError) + end +end