1
0
mirror of https://github.com/meineerde/rackstash.git synced 2026-01-31 17:27:13 +00:00

Add filter to mask of IP addresses in event fields

This commit is contained in:
Holger Just 2018-01-03 20:13:50 +01:00
parent cf7587cde8
commit fe44186da4
3 changed files with 272 additions and 1 deletions

View File

@ -1,6 +1,6 @@
# frozen_string_literal: true
#
# Copyright 2017 Holger Just
# Copyright 2017 - 2018 Holger Just
#
# This software may be modified and distributed under the terms
# of the MIT license. See the LICENSE.txt file for details.
@ -180,6 +180,7 @@ require 'rackstash/encoder/logstash'
require 'rackstash/encoder/message'
require 'rackstash/encoder/raw'
require 'rackstash/filter/anonymize_ip_mask'
require 'rackstash/filter/clear_color'
require 'rackstash/filter/default_fields'
require 'rackstash/filter/default_tags'

View File

@ -0,0 +1,157 @@
# frozen_string_literal: true
#
# Copyright 2018 Holger Just
#
# This software may be modified and distributed under the terms
# of the MIT license. See the LICENSE.txt file for details.
require 'ipaddr'
require 'rackstash/filter'
module Rackstash
module Filter
# Anonymize found IP addresses by masking of a number of bits so that only
# the network of the address remains identifiable but the specific host
# remains anonymous.
#
# This is a very common approach to ensure a balance between direct
# identification of an IP address (e.g. a client IP of a web request) and
# the desire to anonymize it a bit. With the network still present, it is
# possible to roughly identify the source of the request and perform
# analysis. Usually, it's not possible anymore to identify a specific user
# anymore though.
#
# You can define the number of bits that should be masked of at the end of
# the IP address. This is not the same as a subnet mask, in fact, it is the
# inverse. By default, we mask of 8 bits for IPv4 addresses (so that a `/24`
# network remains) and 80 bits for IPv6 addresses (so that a `/80` network
# remains).
#
# Note that IPv4-mapped IPv6 addresses as well as IPv4-compatible IPv6
# addresses are masked off as IPv4 addresses since they actually (more or
# less) represent an IPv4 address.
#
# We are writing raw String representations of the anonymized IP address to
# the target field:
#
# @example
# Rackstash::Flow.new(STDOUT) do
# # Anonymize IP addresses
# filter :anonymize_ip_mask, {'source_ip' => 'source_ip'}
# end
class AnonymizeIPMask
# @param field_spec [Hash<#to_s => #to_s>] a `Hash` specifying which
# fields should be anonymized and where the result should be stored. The
# key described the name of the existing source field and the value the
# name of field where the anonymized result should be stored.
# @param ipv4_mask [#to_i] The number of bits which are masked off at
# the end of an IPv4 address, i.e. that many bits at the end of an IPv4
# address are set to 0. Must be between 1 and 32.
# @param ipv6_mask [#to_i] The number of bits which are masked off at
# the end of an IPv6 address, i.e. that many bits at the end of an IPv6
# address are set to 0. Must be between 1 and 128.
def initialize(field_spec, ipv4_mask: 8, ipv6_mask: 80)
@fields = {}
Hash(field_spec).each_pair do |key, value|
@fields[key.to_s] = value.to_s
end
@ipv4_mask = Integer(ipv4_mask)
unless @ipv4_mask.between?(1, 32)
raise ArgumentError, 'ipv4_mask must be between 1 and 32 bits'
end
@ipv6_mask = Integer(ipv6_mask)
unless @ipv6_mask.between?(1, 128)
raise ArgumentError, 'ipv6_mask must be between 1 and 128 bits'
end
end
# Anonymize configured fields with IP addresses in the given `event` hash
# by masking of the defined number of bits at the end. The anonymized IP
# address will then be written to the target key in the event hash as a
# simple String representation of the IP address.
#
# If we can not parse the value in a source field as an IP address, we
# will not write anything to the target key in the event hash.
#
# @example
# filter = Rackstash::Filter::AnonymizeIPMask.new('source_ip' => 'anonymized_ip')
#
# filter.call('source_ip' => '10.42.42.123')
# # => {'source_ip' => '10.42.42.123', 'anonymized_ip' => '10.42.42.0'}
#
# filter.call('source_ip' => '2400:cb00:2048:1::6810:1460')
# # => {'source_ip' => '2400:cb00:2048:1::6810:1460', 'anonymize_ip' => '2400:cb00:2048::'}
#
# # We are not writing the new value if a source can not be found
# filter.call('another_ip' => '192.168.42.123')
# # => {'another_ip' => '192.168.42.123'}
#
# @param event [Hash] an event hash
# @return [Hash] the given `event` with the defined IP fields encrypted
def call(event)
@fields.each_pair do |source, target|
value = anonymize(event[source])
event[target] = value unless value.nil?
end
event
end
# Anonymize a single IP address or an array of IP addresses by masking of
# trailing bits. When giving a single value, we return the masked IP as a
# `String` or `nil` if the given value is not a valid IP address. When
# giving multiple values (i.e. an `Array` of IP addresses), we try to
# anonymize each value separately. Only valid values will be included in
# the returned Array.
#
# @param value [#to_s, Array<#to_s>] the IP address(es) to anonymize
# @return [String, Array<String>, nil] The anonymized IP or `nil` if the
# given `value` was invalid. When giving an Array, we return an array of
# anonymized IPs. Only value source values are included.
def anonymize(value)
case value
when Array
result = []
value.each do |element|
anonymized = anonymize_value(element)
result << anonymized unless anonymized.nil?
end
result
when nil
nil
else
anonymize_value(value)
end
end
private
# Anonymize a single IP address
# @param value [#to_s] an IP address
# @return [String, nil] the anonymized IP address or `nil` if the given
# `value` was not a valid IP address
def anonymize_value(value)
begin
ip = IPAddr.new(value.to_s)
rescue ArgumentError
# IPAddr was not able to parse the value as an IPAddress
return nil
end
if ip.ipv4?
masked_ip = ip.mask(32 - @ipv4_mask)
elsif ip.ipv4_compat? || ip.ipv4_mapped?
masked_ip = ip.mask(128 - @ipv4_mask)
elsif ip.ipv6?
masked_ip = ip.mask(128 - @ipv6_mask)
end
masked_ip.to_s.force_encoding(Encoding::UTF_8)
end
end
register AnonymizeIPMask, :anonymize_ip_mask
end
end

View File

@ -0,0 +1,113 @@
# frozen_string_literal: true
#
# Copyright 2018 Holger Just
#
# This software may be modified and distributed under the terms
# of the MIT license. See the LICENSE.txt file for details.
require 'spec_helper'
require 'rackstash/filter/anonymize_ip_mask'
describe Rackstash::Filter::AnonymizeIPMask do
let(:event) {
{
'ipv4' => '10.123.42.65',
'ipv6' => '2400:cb00:2048:1::6810:1460',
'ipv6_mapped' => '::FFFF:192.168.42.65',
'ipv6_compat' => '::10.123.42.65',
'invalid' => 'invalid',
'array' => ['10.123.42.65', 123, 'foobar', '2400:cb00:2048:1::6810:1460']
}
}
let(:filter_spec) { {} }
let(:ipv4_mask) { 8 }
let(:ipv6_mask) { 80 }
let(:filter) {
described_class.new(
filter_spec,
ipv4_mask: ipv4_mask,
ipv6_mask: ipv6_mask
)
}
it 'masks IPv4 addresses' do
filter_spec['ipv4'] = 'anonymized'
filter.call(event)
expect(event).to include(
'ipv4' => '10.123.42.65',
'anonymized' => '10.123.42.0'
)
end
it 'masks IPv6 addresses' do
filter_spec['ipv6'] = 'anonymized'
filter.call(event)
expect(event).to include(
'ipv6' => '2400:cb00:2048:1::6810:1460',
'anonymized' => '2400:cb00:2048::'
)
end
it 'masks IPv4-mapped IPv6 addresses' do
filter_spec['ipv6_mapped'] = 'anonymized'
filter.call(event)
expect(event).to include(
'ipv6_mapped' => '::FFFF:192.168.42.65',
'anonymized' => '::ffff:192.168.42.0'
)
end
it 'masks IPv4-compatible IPv6 addresses' do
filter_spec['ipv6_compat'] = 'anonymized'
filter.call(event)
expect(event).to include(
'ipv6_compat' => '::10.123.42.65',
'anonymized' => '::10.123.42.0'
)
end
it 'retains invalid values' do
filter_spec['invalid'] = 'ignored'
filter.call(event)
expect(event).to include 'invalid' => 'invalid'
expect(event).not_to include 'ignored'
end
it 'ignores unknown values' do
filter_spec['unknown'] = 'ignored'
filter.call(event)
expect(event).not_to include 'ignored'
end
it 'anonymizes arrays' do
filter_spec['array'] = 'anonymized'
filter.call(event)
expect(event).to include 'anonymized' => ['10.123.42.0', '2400:cb00:2048::']
end
it 'fails with invalid arguments' do
expect { described_class.new({}, ipv4_mask: 0) }.to raise_error(ArgumentError)
expect { described_class.new({}, ipv4_mask: -3) }.to raise_error(ArgumentError)
expect { described_class.new({}, ipv4_mask: 33) }.to raise_error(ArgumentError)
expect { described_class.new({}, ipv4_mask: '/24') }.to raise_error(ArgumentError)
expect { described_class.new({}, ipv4_mask: false) }.to raise_error(TypeError)
expect { described_class.new({}, ipv4_mask: nil) }.to raise_error(TypeError)
expect { described_class.new({}, ipv6_mask: 0) }.to raise_error(ArgumentError)
expect { described_class.new({}, ipv6_mask: -3) }.to raise_error(ArgumentError)
expect { described_class.new({}, ipv6_mask: 129) }.to raise_error(ArgumentError)
expect { described_class.new({}, ipv6_mask: '/80') }.to raise_error(ArgumentError)
expect { described_class.new({}, ipv6_mask: false) }.to raise_error(TypeError)
expect { described_class.new({}, ipv6_mask: nil) }.to raise_error(TypeError)
end
end