#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Console script to collect statistics about definitions."""
import collections
import os
import sys
import time
from artifacts import definitions
from artifacts import reader
[docs]
class ArtifactStatistics(object):
"""Generate and print statistics about artifact definitions."""
[docs]
def __init__(self):
"""Initializes artifact statistics."""
super(ArtifactStatistics, self).__init__()
self._os_counts = collections.Counter()
self._path_count = 0
self._reg_key_count = 0
self._source_type_counts = {}
self._total_count = 0
def _PrintDictAsTable(self, title, src_dict):
"""Prints a table of artifact definitions.
Args:
title (str): title of the table.
src_dict (dict[str, ArtifactDefinition]): artifact definitions by name.
"""
print(f'### {title:s}')
print('')
print('Identifier | Number')
print('--- | ---')
for key, value in sorted(src_dict.items()):
print(f'{key:s} | {value:d}')
print('')
[docs]
def PrintOSTable(self):
"""Prints a table of artifact definitions by operating system."""
self._PrintDictAsTable('Operating systems', self._os_counts)
[docs]
def PrintSourceTypeTable(self):
"""Prints a table of artifact definitions by source type."""
self._PrintDictAsTable(
'Artifact definition source types', self._source_type_counts)
[docs]
def PrintSummaryTable(self):
"""Prints a summary table."""
date_time_string = time.strftime('%Y-%m-%d')
print(f"""Status of the repository as of {date_time_string:s}
Description | Number
--- | ---
Number of artifact definitions: | {self._total_count:d}
Number of file paths: | {self._path_count:d}
Number of Windows Registry key paths: | {self._reg_key_count:d}
""")
[docs]
def BuildStats(self):
"""Builds the statistics."""
artifact_reader = reader.YamlArtifactsReader()
self._os_counts = collections.Counter()
self._path_count = 0
self._reg_key_count = 0
self._source_type_counts = {}
self._total_count = 0
data_files_path = os.path.join('artifacts', 'data')
for artifact_definition in artifact_reader.ReadDirectory(data_files_path):
sources_supported_os = set()
for source in artifact_definition.sources:
self._total_count += 1
source_type = source.type_indicator
self._source_type_counts[source_type] = self._source_type_counts.get(
source_type, 0) + 1
if source_type == definitions.TYPE_INDICATOR_WINDOWS_REGISTRY_KEY:
self._reg_key_count += len(source.keys)
elif source_type == definitions.TYPE_INDICATOR_WINDOWS_REGISTRY_VALUE:
self._reg_key_count += len(source.key_value_pairs)
elif source_type in (definitions.TYPE_INDICATOR_FILE,
definitions.TYPE_INDICATOR_DIRECTORY):
self._path_count += len(source.paths)
sources_supported_os.update(set(source.supported_os))
# Fallback to the supported_os defined at definition level if none
# of the sources specified supported operating systems.
if not sources_supported_os:
sources_supported_os = set(artifact_definition.supported_os)
for os_str in sources_supported_os:
self._os_counts[os_str] += 1
[docs]
def PrintStats(self):
"""Build stats and print in MarkDown format."""
data_directory_url = (
'https://github.com/ForensicArtifacts/artifacts/tree/main/artifacts/'
'data')
style_guide_url = (
'https://artifacts.readthedocs.io/en/latest/sources/'
'Format-specification.html')
print(f"""## Statistics
The artifact definitions can be found in the
[artifacts/data directory]({data_directory_url:s}) and the format is described
in detail in the [Style Guide]({style_guide_url:s}).
""")
self.BuildStats()
self.PrintSummaryTable()
self.PrintSourceTypeTable()
self.PrintOSTable()
[docs]
def Main():
"""Entry point of console script to collect statistics about definitions.
Returns:
int: exit code that is provided to sys.exit().
"""
statsbuilder = ArtifactStatistics()
statsbuilder.PrintStats()
return 0
if __name__ == '__main__':
sys.exit(Main())