# currently no other option but to have daily logs and have year-month-day format in the name with
# 4-digit year and 2-digit month and day
log_name_pattern: sample_logs/counter_(yyyy-mm-dd).log

# path_types regular expressions allow matching to classify page urls as either an investigation or request
# based on specific URL structure for your system.
# Dataverse Note: the url matches on this does not include the query params, so dataset.xhtml\S+ will not match
path_types:
  investigations:
    - ^.*/dataset.xhtml\S*$
    - ^.*/file.xhtml\S*$
    - ^.*/api/datasets\S*$
    - ^.*/api/v1/datasets\S*$
    ## Below historic regex for testing
    #- ^/api/datasets/[^\/]+$
    #- ^/api/versions/\d+$
    #- ^/stash/dataset/\S+$
    #- ^/stash/data_paper/\S+$
  requests:
    - ^.*/api/access/datafile\S+$
    - ^.*/api/v1/access/datafile\S+$
    ## Below historic regex for testing
    #- ^/api/datasets/[^\/]+/download$
    #- ^/api/versions/\d+/download$
    #- ^/api/downloads/\d+$
    #- ^/stash/downloads/download_resource/\d+$
    #- ^/stash/downloads/file_download/\d+$
    #- ^/stash/downloads/file_stream/\d+$
    #- ^/stash/downloads/async_request/\d+$
    #- ^/stash/share/\S+$

# Robots and machines urls are urls where the script can download a list of regular expressions to determine
# if something is a robot or machine user-agent.  The text file has one regular expression per line
robots_url: https://raw.githubusercontent.com/CDLUC3/Make-Data-Count/master/user-agents/lists/robot.txt
machines_url: https://raw.githubusercontent.com/CDLUC3/Make-Data-Count/master/user-agents/lists/machine.txt

# the year and month for the report you are creating.
#year_month: 2018-05
year_month: 2019-01

# Don't put the filename extension, the code will tack on the tsv or json extension for you.
# Output formats are either tsv or json currently.  TSV is currently broken until anyone accepts reports in that format.
output_file: /dataverse/sushi_sample_logs
output_format: json

# the name of the platform that goes into your reports
platform: Dataverse

# Don't put your api token in here if you're going to commit it, but put in separate secrets.yaml in same
# directory as the config or else set a environment variable when starting up in order to override the key.
# yaml key/values set in secrets.yaml will override one from the main config.
hub_api_token: set_me_in_secrets
# the test metrics is only for testing
# hub_base_url: https://metrics.test.datacite.org
hub_base_url: https://api.test.datacite.org
upload_to_hub: False

# only use this to simulate running on a date besides today
# simulate_date: 2018-04-02

maxmind_geoip_country_path: maxmind_geoip/GeoLite2-Country.mmdb
