'''
Functions that processes row data placed here
'''
from apm_helpers.decorator import cached_by_key_column_and_custom_fields as cached_extra,cached_by_key_column,cached_by_unique_index
from debug import myprint
from apm_helpers.messages import Messages as msg
from apm_helpers.justifiedbool import JustifiedBool
import functools
from collections import defaultdict
import itertools
import math
import re
def get_sum_metrics(row,type_,*metrics):
 return sum(type_(row[m])for m in metrics if m in row and row[m])if row else 0.0
_enforce_fallback=False
def set_no_stacks_mode(no_stacks_mode=True):
 global _enforce_fallback
 _enforce_fallback=no_stacks_mode
def bottomup_fallback(default=None,scale_by_time=True,postprocess=None):
 def decorator(func):
  @functools.wraps(func)
  def wrapper(row,check_bottom_up=True):
   ret=default
   if row:
    res=func(row)
    if row.is_bottomup:
     ret=res if res is not None else default
    elif res is not None and not _enforce_fallback:
     ret=res
    elif(check_bottom_up or _enforce_fallback):
     bu_row=get_bottom_up(row)
     if bu_row is None:
      ret=default
     else:
      bu_res=func(bu_row)
      if bu_res is None:
       ret=default
      elif not scale_by_time:
       ret=bu_res
      else:
       try:
        multiplier=float(row['total_time'])/float(bu_row['total_time'])
        try:
         ret=tuple(x*multiplier for x in bu_res)
        except TypeError:
         ret=bu_res*multiplier
       except(ValueError,ZeroDivisionError):
        ret=bu_res
   return postprocess(ret)if postprocess else ret
  return wrapper
 return decorator
@cached_by_key_column
def is_executed(row):
 return not(row['type']and '[Not Executed]' in row['type'])
@cached_by_key_column
def is_completely_unrolled(row):
 return row['type']and 'Completely Unrolled' in row['type']
@cached_by_key_column
def is_scalar(row):
 return row['type']and 'Vector' not in row['type']
@cached_by_key_column
def is_function(row):
 return 'type' in row and 'function' in row['type'].lower()
@cached_by_key_column
def is_loop(row):
 return 'function_call_sites_and_loops' in row and '[loop in' in row['function_call_sites_and_loops']
@cached_by_key_column
def is_inlined_function(row):
 return 'type' in row and 'inlined function' in row['type'].lower()
@cached_by_key_column
def is_vector_function(row):
 return 'type' in row and 'vector function' in row['type'].lower()
def get_bottom_up(row):
 if row.is_bottomup:
  return row
 return next(row.sync,None)
@cached_by_key_column
def get_loop_size(row):
 loop=get_bottom_up(row)
 if not loop:
  return 0
 inst_size=16
 size=loop['static_all_instructions']
 if size!='':
  try:
   return int(size)*inst_size
  except ValueError:
   pass
 return 0
@cached_by_key_column
def get_memory_footprint(row):
 loop=get_bottom_up(row)
 if not loop:
  return 0
 try:
  foot=loop['footprint_estimate']
 except KeyError:
  foot=loop['maximum_site_footprint']
 try:
  max_footprint=memsize_string_to_bytes(foot)
 except ValueError:
  max_footprint=0
 try:
  first=loop['first_instance_site_footprint']
  first_footprint=memsize_string_to_bytes(first)
 except ValueError:
  first_footprint=0
 return max(first_footprint,max_footprint)
@cached_by_key_column
def get_bytes_transferred(row):
 bu_row=get_bottom_up(row)
 if not bu_row:
  return 0
 bytes_transferred=0.0
 incoming_bytes=0.0
 outgoing_bytes=0.0
 try:
  bytes_transferred=float(bu_row["offload_shared"])
 except(ValueError,TypeError,KeyError)as e:
  pass
 if bytes_transferred>0.0:
  return bytes_transferred
 try:
  incoming_bytes=float(bu_row["offload_host_to_device"])
 except(ValueError,TypeError,KeyError)as e:
  pass
 try:
  outgoing_bytes=float(bu_row["offload_device_to_host"])
 except(ValueError,TypeError,KeyError)as e:
  pass
 bytes_transferred=incoming_bytes+outgoing_bytes
 return bytes_transferred
@cached_by_key_column
def get_data_transferred(row):
 bu_row=get_bottom_up(row)
 if not bu_row:
  return{'shared':0.0,'in':0.0,'out':0.0,'inout':0.0}
 data_transferred={}
 try:
  data_transferred['shared']=float(bu_row["offload_shared"])
 except(ValueError,TypeError,KeyError)as e:
  data_transferred['shared']=0.0
 try:
  data_transferred['in']=float(bu_row["offload_host_to_device"])
 except(ValueError,TypeError,KeyError)as e:
  data_transferred['in']=0.0
 try:
  data_transferred['out']=float(bu_row["offload_device_to_host"])
 except(ValueError,TypeError,KeyError)as e:
  data_transferred['out']=0.0
 data_transferred['inout']=data_transferred['shared']-data_transferred['in']-data_transferred['out']
 return data_transferred
@cached_by_key_column
def get_vector_length(row):
 if not row.is_bottomup and is_inlined_function(row):
  if row.parent is None:
   raise Exception(msg.INTERNAL_ERR_PARENT_NOT_FOUND_FOR_INLINED_FUNC.format(row['function_call_sites_and_loops']))
  return get_vector_length(row.parent)
 if not row.is_bottomup and is_vector_function(row):
  if row.parent is None:
   raise Exception(msg.INTERNAL_ERR_PARENT_NOT_FOUND_FOR_VECTOR_FUNC.format(row['function_call_sites_and_loops']))
  return get_vector_length(row.parent)
 vector_length=1
 possible_vls=[]
 vec_len_str=row['vector_length']
 try:
  vector_length=int(vec_len_str)
 except ValueError:
  try:
   vls=vec_len_str.split(';')
   possible_vls=[int(x)for x in vls]
   vector_length=max(possible_vls)
  except ValueError:
   pass
 return vector_length,possible_vls
def _get_call_count(row):
 if not row['call_count']:
  return None
 res=0.0
 try:
  res=float(row['call_count'])
 except ValueError:
  try:
   res=sum([float(x)for x in row['call_count'].split(';')])
  except ValueError:
   pass
 if res>=1.0:
  return res
 return None
@cached_extra('check_bottom_up')
def get_call_count(row,check_bottom_up=True):
 res=_get_call_count(row)
 if not row.is_bottomup and(res is None and check_bottom_up or _enforce_fallback):
  def get_parent_ex_count(parent):
   if parent:
    if is_function(parent):
     return get_call_count(parent)
    else:
     return get_trip_count_total(parent)
   return 1.0
  use_parent_ex_count=False
  if is_inlined_function(row):
   use_parent_ex_count=True
  else:
   bu_row=get_bottom_up(row)
   if bu_row:
    if len(list(bu_row.sync))>1:
     if row.parent:
      bu_call_count=_get_call_count(bu_row)
      if is_function(row):
       multiplier=float(row['total_time'])/float(bu_row['total_time'])
      else:
       multiplier=get_parent_ex_count(row.parent)/get_parent_ex_count(get_bottom_up(row.parent))
      res=bu_call_count*multiplier if bu_call_count else 1.0
    else:
     res=_get_call_count(bu_row)
   else:
    use_parent_ex_count=True
  if use_parent_ex_count or res is None:
   res=get_parent_ex_count(row.parent)
 return res if res and res>=1.0 else 1.0
@cached_extra('check_bottom_up')
@bottomup_fallback(default=1.0,postprocess=lambda x:x if x>=1 else 1.0)
def get_trip_count_total(row):
 if not row['trip_count_total']:
  return None
 res=1.0
 try:
  res=float(row['trip_count_total'])
  return res
 except ValueError:
  pass
 return None
@cached_by_key_column
@bottomup_fallback(default=1.0,scale_by_time=False)
def get_avg_trip_count(row):
 if not is_executed(row):
  return 0.0
 tct=get_trip_count_total(row,True)
 cc=get_call_count(row,True)
 if tct is not None and cc is not None and cc>0.0:
  return tct/cc
 return None
@cached_extra('check_bottom_up')
@bottomup_fallback(default=1.0,postprocess=lambda x:x if x>=1 else 1.0)
def get_ex_count(row):
 if not row.is_bottomup and is_inlined_function(row)and not(row['call_count']or row['trip_count_total']):
  curr=row
  while is_inlined_function(curr):
   if curr.parent:
    curr=curr.parent
   else:
    raise Exception(msg.INTERNAL_ERR_PARENT_NOT_FOUND_FOR_INLINED_FUNC.format(row['function_call_sites_and_loops']))
  return get_ex_count(curr,True)
 result=float(row['trip_count_total']or row['call_count']or '0.0')
 if not row.is_bottomup and get_ex_count.trim_parallel_libs:
  daal_threader_func_pattern=re.compile(r'daal::threader_func(_b)*\<.*\>')
  daal_loop_in_threader_for_pattern=re.compile(r'\[loop in _daal_threader_for(_blocked)*\]')
  row_name=row['function_call_sites_and_loops']
  if match_pattern(row_name,daal_loop_in_threader_for_pattern):
   threader_func_call_count_sum=0.0
   stack=list(row.children)
   while stack:
    successor=stack.pop()
    successor_name=successor['function_call_sites_and_loops']
    if match_pattern(successor_name,daal_threader_func_pattern):
     threader_func_call_count_sum+=float(successor['call_count']or '0.0')
    elif not match_pattern(successor_name,daal_loop_in_threader_for_pattern):
     for child in successor.children:
      stack.append(child)
   result=max(result,threader_func_call_count_sum)
 return result if result>=1.0 else None
get_ex_count.trim_parallel_libs=True
@cached_by_key_column
@bottomup_fallback(default=1.0)
def get_calls_per_iteration(row):
 calls_per_loop=1.0
 executions=get_ex_count(row,True)
 parent=row.parent
 if parent and parent['type']and 'Function' not in parent['type']:
  parent_loop_executions=get_ex_count(parent,True)
  calls_per_loop=executions/parent_loop_executions
 return math.floor(calls_per_loop)if calls_per_loop>1.0 else 1.0
@cached_extra('relaxed')
def is_offload_candidate(row,relaxed=False):
 key=row['key_column']
 reasons=[]
 candidate=True
 bu_row=get_bottom_up(row)
 bu_key=bu_row['key_column']if bu_row else ''
 if relaxed:
  if not is_executed(row):
   myprint(msg.DEBUG_NOT_AN_OFFLOAD__NOT_EXECUTED.format(key))
   candidate=False
   reasons.append(msg.NonOffload.Reasons.NOT_EXECUTED)
  elif not bu_row:
   myprint(msg.DEBUG_NOT_AN_OFFLOAD__NOT_IN_BOTTOMUP_TABLE.format(key))
   candidate=False
   reasons.append(msg.NonOffload.Reasons.INTERNAL_ERROR)
  elif not(bu_row['static_compute']or bu_row['static_compute_with_memory']or bu_row['static_loaded_bytes']or bu_row['static_stored_bytes']or bu_row['static_loads']or bu_row['static_stores']):
   myprint(msg.DEBUG_NOT_AN_OFFLOAD__NO_INSTRUCTION_MIX.format(key,bu_key))
   candidate=False
   reasons.append(msg.NonOffload.Reasons.INTERNAL_ERROR)
  return JustifiedBool(candidate,reasons)
 if not is_executed(row):
  myprint(msg.DEBUG_NOT_AN_OFFLOAD__NOT_EXECUTED.format(key))
  candidate=False
  reasons.append(msg.NonOffload.Reasons.NOT_EXECUTED)
 elif not is_user_function(row):
  myprint(msg.DEBUG_NOT_AN_OFFLOAD__SYSTEM_MODULE.format(key))
  candidate=False
  reasons.append(msg.NonOffload.Reasons.SYSTEM_MODULE)
 elif not bu_row:
  myprint(msg.DEBUG_NOT_AN_OFFLOAD__NOT_IN_BOTTOMUP_TABLE.format(key))
  candidate=False
  reasons.append(msg.NonOffload.Reasons.INTERNAL_ERROR)
 elif not(bu_row['static_compute']or bu_row['static_compute_with_memory']or bu_row['static_loaded_bytes']or bu_row['static_stored_bytes']or bu_row['static_loads']or bu_row['static_stores']):
  myprint(msg.DEBUG_NOT_AN_OFFLOAD__NO_INSTRUCTION_MIX.format(key,bu_key))
  candidate=False
  reasons.append(msg.NonOffload.Reasons.INTERNAL_ERROR)
 elif not bu_row['call_count']or(not bu_row['trip_count_total']and not 'Function' in bu_row['type']):
  myprint(msg.DEBUG_NOT_AN_OFFLOAD__NO_EXECUTION_COUNT.format(key,bu_key))
  candidate=False
  reasons.append(msg.NonOffload.Reasons.NO_EXEC_COUNT)
 elif bu_row['call_count']and int(bu_row['call_count'])==0 or bu_row['trip_count_total']and int(bu_row['trip_count_total'])==0:
  myprint(msg.DEBUG_NOT_AN_OFFLOAD__ZERO_EXECUTION_COUNT.format(key,bu_key))
  candidate=False
  reasons.append(msg.NonOffload.Reasons.NO_EXEC_COUNT)
 return JustifiedBool(candidate,reasons)
@cached_extra('field')
def read_map_value(row,field):
 result=0.0
 for site in row.map:
  if site[field]:
   try:
    result=float(site[field])
   except ValueError:
    try:
     result=sum([float(x)for x in site[field].split(';')])
    except ValueError:
     pass
  break
 return result
@cached_extra('field')
def get_map_data(row,field):
 if not row:
  return 0.0
 total_field='all_'+field
 if total_field in row and row['call_count']:
  try:
   total=read_map_value(row,total_field)
   call_count=float(row['call_count'])
   if call_count:
    return total/call_count
  except:
   pass
 return read_map_value(row,field)
@cached_by_key_column
def get_variable_count(row):
 res={}
 for site in row.map:
  for problem in site.problems:
   if problem['variable_references']:
    if row['key_column']not in res:
     if ',' in problem['variable_references']:
      res[row['key_column']]=problem['variable_references'].split(',')
     else:
      res[row['key_column']]=[problem['variable_references']]
    else:
     res[row['key_column']]+=problem['variable_references'].split(',')
 for k,v in res.items():
  if k==row['key_column']:
   res[k]=[list(set(v))]
   res[k].append(row['parent_id'])
   res[k].append(row['loop_height'])
 return res
def get_rfo_misses(row):
 return get_map_data(row,'rfo_cache_misses')
def get_dirty_evictions(row):
 return get_map_data(row,'dirty_evictions')
def get_cache_misses(row):
 return get_map_data(row,'cache_misses')
def get_fit_into_cache(row):
 return get_map_data(row,'fit_into_cache')
def get_total_strides(row):
 bu_row=get_bottom_up(row)
 stride_access_types=('read','write')
 stride_types=('unit','constant','non_unit')
 result=[]
 try:
  for stride_access_type in stride_access_types:
   strides=[]
   for stride_type in stride_types:
    key='{}_{}_strides'.format(stride_access_type,stride_type)
    strides.append(float(bu_row[key]))
   result.append(strides)
 except:
  return None
 return result
def is_mem_stat_valid(mem_stat):
 is_valid='no data' not in mem_stat.values()and '' not in mem_stat.values()
 return is_valid
@cached_by_key_column
def is_map_row_valid(row):
 mem_stat=get_mem_stat(row)
 return mem_stat and is_mem_stat_valid(mem_stat)
@cached_by_key_column
def get_mem_stat(row):
 bu_row=get_bottom_up(row)
 if not bu_row:
  return None
 for map_row in bu_row.map:
  mem_stat=get_mem_stat_internal(bu_row,map_row)
  if is_mem_stat_valid(mem_stat):
   return mem_stat
  break
 return None
def get_eliminated_metrics(row,loads,stores):
 eliminated_ldst=0
 eliminated_ld=0
 eliminated_st=0
 mem_stat=get_mem_stat(row)
 if mem_stat:
  child_eliminated_ld=0.0
  child_eliminated_st=0.0
  for child in row.get_children():
   child_row_mem_stat=get_mem_stat(child)
   if child_row_mem_stat:
    child_eliminated_ld+=(float(child_row_mem_stat[loads]))
    child_eliminated_st+=(float(child_row_mem_stat[stores]))
  eliminated_ld=max((float(mem_stat[loads])-child_eliminated_ld),0)
  eliminated_st=max((float(mem_stat[stores])-child_eliminated_st),0)
  eliminated_ldst=eliminated_ld+eliminated_st
 return eliminated_ldst,eliminated_ld,eliminated_st
@cached_by_key_column
def get_eliminated_bytes(row):
 return get_eliminated_metrics(row,'eliminated_loaded_bytes','eliminated_stored_bytes')
@cached_by_key_column
def get_eliminated_access(row):
 return get_eliminated_metrics(row,'eliminated_loads','eliminated_stores')
@cached_by_key_column
def get_mem_usage(top_row):
 return get_mem_stat(top_row)
@cached_extra('map_row')
def get_mem_stat_internal(bu_row,map_row):
 if not is_executed(bu_row):
  return map_no_mem_data
 if is_function(bu_row):
  return map_undefined_mem_data
 cc=get_call_count(bu_row)
 res={}
 for key in map_columns:
  try:
   res[key]=float(map_row[key])*cc
  except ValueError:
   res[key]='no data'
 return res
map_categories=['loads','stores','memory_operations','loaded_bytes','stored_bytes','transferred_bytes']
map_types=['total','expected','eliminated']
map_columns=list('{}_{}'.format(t,c)for c,t in itertools.product(map_categories,map_types))
map_no_mem_data={k:'no data' for k in map_columns}
map_undefined_mem_data={k:'' for k in map_columns}
@cached_extra('disable_mem_filter')
def get_self_cache_access(row,disable_mem_filter):
 bu_row=get_bottom_up(row)
 read=0.0
 write=0.0
 eliminated_loads=0.0
 eliminated_stores=0.0
 try:
  read=float(bu_row['self_loads'])
  write=float(bu_row['self_stores'])
 except(ValueError,KeyError,TypeError):
  pass
 if not disable_mem_filter:
  eliminated_loads,eliminated_stores=get_eliminated_access(row)[1:]
 read=max((read-eliminated_loads),0)
 write=max((write-eliminated_stores),0)
 rdwr=read+write
 return rdwr,read,write
@cached_extra('disable_mem_filter')
def get_total_cache_access(row,disable_mem_filter):
 rdwr,read,write=0,0,0
 rows=[row]
 while rows:
  curr_row=rows.pop()
  curr_res=get_self_cache_access(curr_row,disable_mem_filter)
  rdwr+=curr_res[0]
  read+=curr_res[1]
  write+=curr_res[2]
  rows+=list(curr_row.get_children())
 return rdwr,read,write
@cached_extra('accelerator')
def get_self_strides_distribution(row,accelerator):
 default_strides_distribution=accelerator.default_strides_distribution
 cache_access_types=accelerator.cache_access_types
 self_strides=get_total_strides(row)
 if not self_strides:
  distribution=default_strides_distribution
  return distribution,distribution
 rows=list(row.children)
 while rows:
  current=rows.pop()
  child_strides=get_total_strides(current)
  if not child_strides:
   for child in current.children:
    rows.append(child)
  else:
   for i in range(len(self_strides)):
    for j in range(len(self_strides[0])):
     self_strides[i][j]-=child_strides[i][j]
     self_strides[i][j]=max(0.,self_strides[i][j])
 distributions=[]
 for i,item in enumerate(self_strides):
  distribution={}
  total=sum(item)
  if total>0:
   for j,memory_access_type in enumerate(cache_access_types):
    distribution[memory_access_type]=item[j]/total
  else:
   distribution=default_strides_distribution
  distributions.append(distribution)
 return distributions
@cached_extra('memory_level')
def get_bw_usage(row,memory_level='1'):
 bw_data_by_memory_level={'1':('{}_{}_gb',1.0e9),'2':('{}_l2_{}_gb',1.0e9),'3':('{}_l3_{}_gb',1.0e9),'4':('{}_l4_{}_gb',1.0e9),'external':('{}_dram_{}_gb',1.0e9),}
 try:
  bw_format,bw_multiplier=bw_data_by_memory_level[memory_level]
 except KeyError:
  myprint(msg.WARNING_UNKNOWN_MEMORY_LEVEL.format(memory_level),severity=3)
 ld_field,st_field=tuple(bw_format.format('self',direction)for direction in('loaded','stored'))
 @bottomup_fallback(default=(0.0,0.0,0.0))
 def get_bw_usage_internal(row):
  res=[]
  for field in ld_field,st_field:
   if not row or field not in row or row[field]=='':
    res.append(0.0)
   else:
    res.append(float(row[field])*bw_multiplier)
  total=sum(res)
  if total==0.0:
   return None
  return sum(res),res[0],res[1]
 res=get_bw_usage_internal(row,True)
 myprint(msg.DEBUG_BW_USAGE.format(memory_level,*res[1:]))
 return res
@cached_extra('memory_level')
def get_total_bw_usage(row,memory_level):
 ldst_bytes,loaded_bytes,stored_bytes=0,0,0
 rows=[row]
 while rows:
  curr_row=rows.pop()
  curr_res=get_bw_usage(curr_row,memory_level)
  ldst_bytes+=curr_res[0]
  loaded_bytes+=curr_res[1]
  stored_bytes+=curr_res[2]
  rows+=list(curr_row.get_children())
 return ldst_bytes,loaded_bytes,stored_bytes
def is_compiler_proven_parallel(loop):
 diag_codes=['15310','15312','15313','15315','15317','15318','15325','15326','15327','15330','15331','15335','15337','15338','15343','15352','15367','15378','15421','15422','15423','15516','15524','15527','15528','15529','15530','15533','15534','15535','15537']
 if loop['vectorization_message_code']and loop['vectorization_message_code']in diag_codes:
  return True
 return False
def is_compiler_explicit_parallel(loop):
 return loop['type']and('Vectorized' in loop['type']or 'Threaded' in loop['type'])
def is_math_module_assumed_parallel(loop):
 return loop.module_type in['MKL','SVML']
@cached_by_key_column
def get_loop_occurences(row):
 return loop_occurences.get(row['key_column'],1.0)
loop_occurences={}
@cached_by_key_column
def get_splitted_loop(row):
 return splitted_loops[row['key_column']]
@cached_by_key_column
def is_minor_loop(row):
 return splitted_loops[row['key_column']].is_minor
splitted_loops={}
def is_loop_in_array(array,idx,lp_name=None):
 exists=False
 if array:
  if idx in array:
   exists=True
  elif lp_name:
   for usr_lp_name in array:
    if usr_lp_name==lp_name:
     exists=True
 return exists
def fill_dependency_metrics(metrics,deptype,distance,key,specified_by):
 metrics['dependent_iterations_distance']=(distance,specified_by)
 metrics['has_dependency']=(distance==1,specified_by)
 metrics['dependency_type']=(deptype,specified_by)
 metrics['dependency_key']=(key,specified_by)
@cached_by_unique_index
def get_dependency_type(lp):
 if not lp or is_function(lp):
  return '',False
 lp_idx=lp['unique_index']
 lp_name=lp['source_location']
 if is_loop_in_array(set_dependency,lp_idx,lp_name):
  return 'Dependency: User',True
 elif is_loop_in_array(set_parallel,lp_idx,lp_name):
  return 'Parallel: User',False
 elif is_compiler_proven_parallel(lp):
  return 'Parallel: Proven',False
 elif is_compiler_explicit_parallel(lp):
  return 'Parallel: Explicit',False
 elif is_loop_in_array(programming_model_parallel,lp_idx):
  return 'Parallel: Programming Model',False
 else:
  any_sites=False
  lp_type=''
  deps=0
  for site in lp.dependencies:
   any_sites=True
   if site['war_dependencies']:
    lp_type+='war:'
    deps+=1
   if site['waw_dependencies']:
    lp_type+='waw:'
    deps+=1
   if site['raw_dependencies']:
    lp_type+='raw:'
    deps+=1
   if site['potential_reduction_column']:
    lp_type+='reduction:'
  if any_sites:
   if not lp_type:
    return 'Parallel: Workload',False
   else:
    return 'Dependency: '+lp_type,deps>0
  else:
   reason=('Dependency','Parallel')[assume_parallel]+': Assumed'
   return reason,not assume_parallel
def get_dependency_key(loop_type):
 if "war:" in loop_type or "waw:" in loop_type or "raw:" in loop_type:
  dependency_type="Dependency: waw:raw:war:"
 elif "reduction:" in loop_type:
  dependency_type="Dependency: reduction:"
 else:
  dependency_type=loop_type
 dependency_dict={"Parallel: User":"parallel_user","Parallel: Proven":"parallel_proven","Parallel: Explicit":"parallel_explicit","Parallel: Workload":"parallel_workload","Parallel: Assumed":"parallel_assumed","Parallel: Programming Model":"parallel_programming_model","Dependency: User":"dependency_user","Dependency: reduction:":"dependency_reduction","Dependency: waw:raw:war:":"dependency_waw_raw_war","Dependency: Assumed":"dependency_assumed",}
 return dependency_dict[dependency_type]
@cached_by_key_column
def has_dependency(row):
 bu_row=get_bottom_up(row)
 return get_dependency_type(bu_row)[1]
@cached_by_key_column
def get_loop_type(row):
 bu_row=get_bottom_up(row)
 return get_dependency_type(bu_row)[0]
set_parallel=None
set_dependency=None
programming_model_parallel=None
assume_parallel=False
@cached_by_key_column
def get_location(row):
 if is_function(row)and row['source_location']:
  return '{} at {}'.format(row['function_call_sites_and_loops'],row['source_location'])
 else:
  return row['function_call_sites_and_loops']
@cached_by_key_column
def is_innermost(row):
 for child in row.get_children():
  return False
 return True
@cached_extra('library')
def is_in_library(row,library):
 if is_user_function(row):
  return False
 lib_name=row['library_name'].lower()
 library=library.lower()
 if library==lib_name:
  return True
 return False
class TotalMetricCalculator:
 def __init__(self,metric_getter):
  self._cache={}
  self._get_metric=metric_getter
 def __call__(self,row,*args):
  def get_key(row,args):
   return(row['key_column'],)+tuple(args)
  key=get_key(row,args)
  if key in self._cache:
   return self._cache[key]
  if row.is_bottomup:
   return self._get_metric(row,*args)or 0.0
  stack=[(row,)]
  while stack:
   curr_callstack=stack.pop()
   curr_row=curr_callstack[-1]
   curr_key=get_key(curr_row,args)
   if curr_key in self._cache:
    metric=self._cache[curr_key]
    for x in curr_callstack[:-1]:
     self._cache[get_key(x,args)]+=metric
    continue
   self._cache[curr_key]=0.0
   metric=self._get_metric(curr_row,*args)
   if metric:
    for x in curr_callstack:
     self._cache[get_key(x,args)]+=metric
   stack+=[(curr_callstack+(x,))for x in curr_row.children]
  return self._cache[key]
def time_in_library_getter(row,library):
 if is_in_library(row,library):
  try:
   return float(row['self_time'])
  except ValueError:
   pass
 return None
get_time_in_library=TotalMetricCalculator(time_in_library_getter)
def system_time_getter(row):
 if not is_user_function(row):
  try:
   return float(row['self_elapsed_time'])
  except ValueError:
   pass
 return None
get_system_call_time=TotalMetricCalculator(system_time_getter)
@cached_by_key_column
def is_user_function(row):
 try:
  if int(row['function_type'])==0:
   return True
 except(KeyError,ValueError):
  return False
 for re_name,re_module in is_user_function.templates:
  if re_name.match(row['function_call_sites_and_loops'])and re_module.match(row['module']):
   return True
 return False
is_user_function.templates=[(re.compile(x),re.compile(y))for x,y in(('.*','^libm\.'),('.*','^libsvml\.'),)]
@cached_by_key_column
def get_basic_blocks(row):
 if not row:
  return[]
 try:
  return row.basic_blocks_lite
 except(AttributeError,KeyError):
  return row.basic_blocks
class AssemblyInstrMixStatus:
 UNKNOWN=0
 OK=1
 ASSEMBLY_UNAVAILABLE=2
 NO_BASIC_BLOCKS_EXECUTED=3
class InstructionMixType:
 UNKNOWN=0
 ASSEMBLY=1
 TD_DYNAMIC=2
 BU_STATIC=3
 BU_DYNAMIC=4
def get_instruction_mix(td_row,ignore_assembly=False):
 asm_ins_mix_status=AssemblyInstrMixStatus.UNKNOWN
 if not ignore_assembly:
  instr_mix,asm_ins_mix_status=get_instruction_mix_from_assembly(td_row)
  if asm_ins_mix_status==AssemblyInstrMixStatus.OK:
   return instr_mix,InstructionMixType.ASSEMBLY,asm_ins_mix_status
 instr_mix={}
 instr_mix_type=InstructionMixType.UNKNOWN
 bu_row=get_bottom_up(td_row)
 if _enforce_fallback:
  if bu_row:
   instr_mix=get_instruction_mix_from_row(bu_row,'dynamic')
   instr_mix_type=InstructionMixType.BU_DYNAMIC
 else:
  instr_mix=get_instruction_mix_from_row(td_row,'self')
  instr_mix_type=InstructionMixType.TD_DYNAMIC
  if not any(instr_mix.values())and bu_row:
   instr_mix=get_instruction_mix_from_row(bu_row,'static')
   instr_mix_type=InstructionMixType.BU_STATIC
 return instr_mix,instr_mix_type,asm_ins_mix_status
get_instruction_mix.COLUMNS=[(('basic','float',32,'global'),('sp_compute_with_memory',)),(('basic','float',32,''),('sp_compute',)),(('basic','float',64,'global'),('dp_compute_with_memory',)),(('basic','float',64,''),('dp_compute',)),(('basic','int',32,'global'),('int_compute_with_memory',)),(('basic','int',32,''),('int_compute',)),(('mov','',32,'global'),('memory',)),(('mov','',64,'global'),tuple()),(('basic','other',32,'global'),('compute_with_memory',)),(('basic','other',32,''),('compute',)),]
get_instruction_mix.INSTRUCTIONS={'fma':{'decrease':'basic','type':('float',),},'add':{'decrease':'basic','type':('float','int',),},'mul':{'decrease':'basic','type':('float','int',),},'sub':{'decrease':'basic','type':('float','int',),},'abs':{'decrease':'basic','type':('float','int',),},'div':{'decrease':'basic','type':('float','int',),},'min':{'decrease':'basic','type':('float','int',),},'max':{'decrease':'basic','type':('float','int',),},'sqrt':{'decrease':'basic','type':('float',),},'sad':{'decrease':'basic','type':('int',),},'scale':{'decrease':'basic','type':('float',),},'sign':{'decrease':'basic','type':('int',),},'reccp':{'decrease':'basic','type':('float',),},}
get_instruction_mix.COMPUTE_EXTENDED={'logical':{'ins':['or','and','not'],'type':'int',},'shift':{'ins':['shl','shr','sal','sar','shld','shrd','sll','srl','sra'],'type':'int',},'convert':{'ins':['cvt','vcvt'],'type':'int',},}
def get_instruction_mix_from_assembly(row):
 res=defaultdict(lambda:0.0)
 assembly_available=False
 basic_blocks_executed=False
 for bb in get_basic_blocks(row):
  assembly_available=True
  bb_res=defaultdict(lambda:0.0)
  if bb.call_count:
   basic_blocks_executed=True
   for k,v in get_instruction_mix.COLUMNS:
    bb_res[k]=sum(get_sum_metrics(x,float,*('static_'+ins_category for ins_category in v))for x in bb.static_mix)
   bb_res[('basic','other',32,'')]-=bb_res[('basic','int',32,'')]+bb_res[('basic','float',32,'')]+bb_res[('basic','float',64,'')]
   bb_res[('basic','other',32,'global')]-=bb_res[('basic','int',32,'global')]+bb_res[('basic','float',32,'global')]+bb_res[('basic','float',64,'global')]
   bb_res[('mov','',32,'global')]-=bb_res[('mov','',64,'global')]
   for line in bb.assembly:
    if line['instruction_name']=='' or not line['operands']:
     continue
    ins_name=line['instruction_name'].lower()
    data_size=max(int(op['type_width'])for op in line['operands'])
    if data_size==0:
     data_size=32
    data_size=min(data_size,512)
    memory_type='global' if 'MEMORY' in line['instruction_type']else ''
    if 'mov' in ins_name:
     if data_size!=32:
      bb_res[('mov','',data_size,memory_type)]+=1
      bb_res[('mov','',32,memory_type)]-=1
     else:
      pass
    for name,d in get_instruction_mix.COMPUTE_EXTENDED.items():
     for sub in d['ins']:
      if sub in ins_name:
       bb_res[(sub,d['type'],data_size,memory_type)]+=1
       break
    for ins,desc in get_instruction_mix.INSTRUCTIONS.items():
     if ins not in ins_name:
      continue
     for type_ in desc['type']:
      key=(desc['decrease'],type_,data_size,memory_type)
      if bb_res[key]>=1:
       bb_res[(ins,type_,data_size,memory_type)]+=1
       bb_res[key]-=1
       break
  for k,v in bb_res.items():
   res[k]+=max(v,0)*bb.call_count
 for k in res:
  res[k]/=get_ex_count(row)
 status=AssemblyInstrMixStatus.OK
 if not assembly_available:
  status=AssemblyInstrMixStatus.ASSEMBLY_UNAVAILABLE
 elif not basic_blocks_executed:
  status=AssemblyInstrMixStatus.NO_BASIC_BLOCKS_EXECUTED
 for ins_key,count in res.items():
  if count<0:
   res[ins_key]=0
   myprint('Fixed negative count {} for instruction {} of row {} (index={}) with assembly'.format(count,ins_key,row['function_call_sites_and_loops'],row['key_column']))
 return dict(res)if status==AssemblyInstrMixStatus.OK else{},status
def get_instruction_mix_from_row(row,ins_mix_type):
 res=defaultdict(lambda:0.0)
 multiplier=1.0/get_ex_count(row)if ins_mix_type!='static' else 1.0
 for k,v in get_instruction_mix.COLUMNS:
  res[k]=get_sum_metrics(row,float,*('{}_{}'.format(ins_mix_type,ins_category)for ins_category in v))*multiplier
 res[('basic','other',32,'')]-=res[('basic','int',32,'')]+res[('basic','float',32,'')]+res[('basic','float',64,'')]
 res[('basic','other',32,'global')]-=res[('basic','int',32,'global')]+res[('basic','float',32,'global')]+res[('basic','float',64,'global')]
 res[('mov','',32,'global')]-=res[('mov','',64,'global')]
 if '64' in row['data_types']:
  data_sizes=(64,32)
 else:
  data_sizes=(32,)
 for op,desc in get_instruction_mix.INSTRUCTIONS.items():
  field_name='{}_{}'.format(ins_mix_type,op)
  try:
   count=float(row[field_name])*multiplier
  except(KeyError,ValueError):
   continue
  if count==0:
   continue
  for memory_type,type_,data_size in itertools.product(('global',''),desc['type'],data_sizes):
   decr_key=(desc['decrease'],type_,data_size,memory_type)
   inc_key=(op,type_,data_size,memory_type)
   if res[decr_key]<count:
    count-=res[decr_key]
    res[inc_key]+=res[decr_key]
    res[decr_key]=0
   else:
    res[inc_key]+=count
    res[decr_key]-=count
    break
 for ins_key,count in res.items():
  if count<0:
   res[ins_key]=0
   myprint('Fixed negative count {} for instruction {} of row {} (index={}) without assembly'.format(count,ins_key,row['function_call_sites_and_loops'],row['key_column']))
 return dict(res)
omp_pattern=re.compile(r'(.*\$omp\$.*)|(.*OpenMP fork.*)|(.*\.omp_fn)|(__kmp_fork.*)')
opencl_pattern=re.compile(r'execute\<.*\>')
daal_pattern=re.compile(r'daal::threader_for\<.*\>')
tbb_pattern=re.compile(r'tbb::parallel_for\<.*\>')
tbb_dispatch_loop=re.compile(r'\[TBB Dispatch Loop\]')
def match_pattern(name,compiled_pattern):
 if not name or not compiled_pattern:
  return False
 m=compiled_pattern.search(name)
 return m is not None
@cached_by_key_column
def is_openmp(row):
 if hasattr(row,'has_openmp')and row.has_openmp:
  return True
 name=row['function_call_sites_and_loops']
 return match_pattern(name,omp_pattern)
@cached_by_key_column
def is_opencl_kernel_call(row):
 name=row['mangled_name']
 return match_pattern(name,opencl_pattern)
@cached_by_key_column
def is_daal_threader(row):
 name=row['function_call_sites_and_loops']
 return match_pattern(name,daal_pattern)
@cached_by_key_column
def is_tbb_parallel_for(row):
 name=row['function_call_sites_and_loops']
 return match_pattern(name,tbb_pattern)
@cached_by_key_column
def is_tbb_dispatch_loop(row):
 name=row['function_call_sites_and_loops']
 return match_pattern(name,tbb_dispatch_loop)
def find_inner_loops(topdown_row,*args):
 top_loops=[]
 to_see=defaultdict(list)
 to_see[0]=[topdown_row]
 level=1
 while to_see.get(level-1)is not None:
  rows=to_see[level-1]
  for row in rows:
   if row.children:
    to_see[level]+=row.children
  level+=1
 for key in to_see.keys():
  rows=to_see.get(key)
  loop_found=False
  for row in rows:
   if row.is_loop:
    top_loops.append(row)
    loop_found=True
  if loop_found:
   break
 return top_loops
def find_outer_user_loop(row,column_name):
 while row:
  if row['function_call_sites_and_loops']=='main':
   break
  if not row.is_function and row[column_name]and not any(x.search(row['module'])for x in find_outer_user_loop._exclude_modules_re):
   return[row]
  row=row.parent
 return[]
find_outer_user_loop._exclude_modules_re=[re.compile("libtbb.*\.so|tbb.*\.dll"),]
