# currently no other option but to have daily logs and have year-month-day format in the name with
# 4-digit year and 2-digit month and day
# /usr/local/payara7/glassfish/domains/domain1/logs/counter_2019-01-11.log
#log_name_pattern: sample_logs/counter_(yyyy-mm-dd).log
log_name_pattern: /usr/local/payara7/glassfish/domains/domain1/logs/mdc/counter_(yyyy-mm-dd).log

# path_types regular expressions allow matching to classify page urls as either an investigation or request
# based on specific URL structure for your system.
# Dataverse Note: the url matches on this does not include the query params, so dataset.xhtml\S+ will not match
path_types:
  investigations:
    - ^.*/dataset.xhtml\S*$
    - ^.*/file.xhtml\S*$
    - ^.*/api/datasets\S*$
    - ^.*/api/v1/datasets\S*$
    ## Below historic regex for testing
    #- ^/api/datasets/[^\/]+$
    #- ^/api/versions/\d+$
    #- ^/stash/dataset/\S+$
    #- ^/stash/data_paper/\S+$
  requests:
    - ^.*/api/access/datafile\S+$
    - ^.*/api/v1/access/datafile\S+$
    ## Below historic regex for testing
    #- ^/api/datasets/[^\/]+/download$
    #- ^/api/versions/\d+/download$
    #- ^/api/downloads/\d+$
    #- ^/stash/downloads/download_resource/\d+$
    #- ^/stash/downloads/file_download/\d+$
    #- ^/stash/downloads/file_stream/\d+$
    #- ^/stash/downloads/async_request/\d+$
    #- ^/stash/share/\S+$

# Robots and machines urls are urls where the script can download a list of regular expressions to determine
# if something is a robot or machine user-agent.  The text file has one regular expression per line
#robots_url: https://raw.githubusercontent.com/CDLUC3/Make-Data-Count/master/user-agents/lists/robot.txt
#machines_url: https://raw.githubusercontent.com/CDLUC3/Make-Data-Count/master/user-agents/lists/machine.txt
robots_url: https://raw.githubusercontent.com/IQSS/counter-processor/refs/heads/goto-gdcc/user-agents/lists/robots.txt
machines_url: https://raw.githubusercontent.com/IQSS/counter-processor/refs/heads/goto-gdcc/user-agents/lists/machine.txt

# the year and month for the report you are creating.
year_month: 2019-01

# Don't put the filename extension, the code will tack on the tsv or json extension for you.
# Output formats are either tsv or json currently.  TSV is currently broken until anyone accepts reports in that format.
# FIXME: "/tmp" is fine for a quick test but pick another directory that
# the "counter" user can write to and that the "dataverse" user can read from.
output_file: /tmp/make-data-count-report
output_format: json

# the name of the platform that goes into your reports
# FIXME: Change "platform" to match the name of your Dataverse installation. Examples
# are "Harvard Dataverse" for Harvard University or "LibraData" for the University of Virginia.
platform: LibreScholar

# Don't put your api token in here if you're going to commit it, but put in separate secrets.yaml in same
# directory as the config or else set a environment variable when starting up in order to override the key.
# yaml key/values set in secrets.yaml will override one from the main config.
hub_api_token: set_me_in_secrets
# the test metrics is only for testing
# hub_base_url: https://metrics.test.datacite.org
# FIXME: change "hub_base_url" to https://api.datacite.org (production) once you have credentials.
hub_base_url: https://api.test.datacite.org
# FIXME: Change "upload_to_hub" to True when you're ready.
upload_to_hub: False

# only use this to simulate running on a date besides today
# simulate_date: 2019-01-12

maxmind_geoip_country_path: maxmind_geoip/GeoLite2-Country.mmdb