Source code for pacifica.metadata.rest.reporting_queries.summarize_by_date

#!/usr/bin/python
# -*- coding: utf-8 -*-
"""CherryPy Status Metadata object class."""
import datetime
import pytz
from dateutil.parser import parse
from cherrypy import tools, request
from peewee import Expression, OP
from pacifica.metadata.rest.reporting_queries.query_base import QueryBase
from pacifica.metadata.orm import TransSIP, Files
from pacifica.metadata.orm.base import db_connection_decorator


# pylint: disable=too-few-public-methods
[docs]class SummarizeByDate(QueryBase): """Retrieves a list of all transactions matching the search criteria.""" exposed = True # pylint: disable=too-many-locals
[docs] @staticmethod def _search_by_dates(object_type, object_id_list, start_date, end_date, time_basis): time_column_name = QueryBase.time_basis_mappings.get(time_basis) object_type_column_name = QueryBase.object_type_mappings.get( object_type) if time_basis == 'submitted': time_column = getattr(TransSIP, time_column_name) else: time_column = getattr(Files, time_column_name) object_type_column = getattr(TransSIP, object_type_column_name) where_clause = Expression(time_column, OP.GTE, start_date) where_clause &= Expression(time_column, OP.LTE, end_date) where_clause &= (object_type_column << object_id_list) query = Files().select( Files.id, time_column.alias( 'filedate'), Files.size, Files.transaction ).join(TransSIP, on=(TransSIP.id == Files.transaction)) # pylint: disable=no-member query = query.where(where_clause).order_by(time_column).objects() # pylint: enable=no-member results = { 'day_graph': { 'by_date': { 'available_dates': {}, 'file_count': {}, 'file_volume': {}, 'transactions': {}, 'file_volume_array': {}, 'transaction_count_array': {} } }, 'summary_totals': { 'upload_stats': { 'project': {}, 'instrument': {}, 'user': {} }, 'total_file_count': 0, 'total_size_bytes': 0, 'total_size_string': '' }, 'transaction_info': { 'transaction': {}, 'project': {}, 'instrument': {}, 'user': {} } } transaction_cache = {} for item in query.iterator(): if item.transaction_id not in transaction_cache: t_info = item.transaction.transsip.get().to_hash() transaction_cache[item.transaction_id] = t_info else: t_info = transaction_cache[item.transaction_id] SummarizeByDate._summarize_by_date( results['day_graph']['by_date'], item) SummarizeByDate._update_transaction_info_block( results['transaction_info'], item, t_info) SummarizeByDate._summarize_upload_stats( results['summary_totals']['upload_stats'], t_info) results['summary_totals']['total_file_count'] += 1 results['summary_totals']['total_size_bytes'] += item.size return results
# pylint: enable=too-many-locals
[docs] @staticmethod def _update_transaction_info_block(info_block, item, t_info): proj = t_info['project'] inst = t_info['instrument'] submitter = t_info['submitter'] transsip = item.transaction.transsip.get() if proj not in info_block['project'].keys(): info_block['project'][proj] = transsip.project.title if inst not in info_block['instrument'].keys(): info_block['instrument'][inst] = transsip.instrument.name if submitter not in info_block['user'].keys(): info_block['user'][submitter] = u'{0} {1}'.format( transsip.submitter.first_name, transsip.submitter.last_name )
[docs] @staticmethod def _summarize_upload_stats(upload_stats_block, transaction_info): if transaction_info['project'] not in upload_stats_block['project'].keys(): upload_stats_block['project'][transaction_info['project']] = 0 upload_stats_block['project'][transaction_info['project']] += 1 if transaction_info['instrument'] not in upload_stats_block['instrument'].keys(): upload_stats_block['instrument'][transaction_info['instrument']] = 0 upload_stats_block['instrument'][transaction_info['instrument']] += 1 if transaction_info['submitter'] not in upload_stats_block['user'].keys(): upload_stats_block['user'][transaction_info['submitter']] = 0 upload_stats_block['user'][transaction_info['submitter']] += 1
[docs] @staticmethod def _summarize_by_date(summary_block, item): current_day = SummarizeByDate.utc_to_local(item.filedate).date() current_day = current_day.strftime('%Y-%m-%d') if current_day not in summary_block['file_count'].keys(): summary_block['file_count'][current_day] = 0 if current_day not in summary_block['file_volume'].keys(): summary_block['file_volume'][current_day] = 0 if current_day not in summary_block['transactions'].keys(): summary_block['transactions'][current_day] = [] summary_block['file_count'][current_day] += 1 summary_block['file_volume'][current_day] += item.size if item.transaction_id not in summary_block['transactions'][current_day]: summary_block['transactions'][current_day].append( item.transaction.id)
# return summary_block
[docs] @staticmethod def local_to_utc(local_datetime_obj): """Return a TZ corrected datetime object.""" utc_datetime_obj = QueryBase.local_timezone.localize( local_datetime_obj) return utc_datetime_obj.astimezone(pytz.utc)
[docs] @staticmethod def utc_to_local(utc_datetime_obj): """Return a TZ corrected datetime object.""" local_datetime_obj = pytz.utc.localize(utc_datetime_obj) return local_datetime_obj.astimezone(QueryBase.local_timezone)
[docs] @staticmethod def _canonicalize_dates(start_date, end_date): try: start_date_obj = SummarizeByDate.local_to_utc(parse(start_date)) except ValueError: start_date_obj = SummarizeByDate.local_to_utc(parse('1997-01-01')) try: end_date_obj = SummarizeByDate.local_to_utc(parse(end_date)) except ValueError: end_date_obj = SummarizeByDate.local_to_utc( datetime.datetime.now()) return start_date_obj.isoformat(), end_date_obj.isoformat()
# Cherrypy requires these named methods. # pylint: disable=invalid-name @staticmethod @tools.json_in() @tools.json_out() @db_connection_decorator def POST(time_basis=None, object_type=None, start_date=None, end_date=None): """Return summaryinfo for a given object type/id/time range combo.""" # check time basis validity time_basis_list = {'creat': 'created', 'modif': 'modified', 'submi': 'submitted'} if time_basis[0:5] not in time_basis_list.keys() or time_basis is None: time_basis = 'modified' else: time_basis = time_basis_list[time_basis[0:5]] object_type_list = ['instrument', 'project', 'user'] if object_type not in object_type_list or object_type is None: object_type = 'instrument' # check start/end date validity start_date, end_date = SummarizeByDate._canonicalize_dates( start_date, end_date) return SummarizeByDate._search_by_dates( object_type, request.json, start_date, end_date, time_basis)