%%% derive average distributions for the time in use, time in storage and
%%% fates across the available unu keys to use as an extrapolation for the other
%%% keys that do not have the required data

%%% where possible, derive the averages for the unus in the same century group, i.e. 100s, 200s, etc; 
%%% if a group doesn't have any data, use average across the entire pool (lower confidence)

%%% score the data according to the accuracy of extrapolation 

scores_time_and_fate_unu_extrap = zeros(total_unu_keys_entire_pool, 3); % scores for the extrapolation of time in use, time in storage and fate (2nd argument = variable) to other unus when the data is not available
% score = 5 when the required data for a given unu is available;
% score = 3.5 when the required data is available for other unus in the same century (similar products)
% score = 2 when the required data is only available for unus in other centuries => use average across all unus

% derive averages across the entire pool

aux_vector_time_use = zeros(total_new_pdf_timesteps,1);
aux_vector_time_storage = zeros(total_new_pdf_timesteps,1);
%
aux_vector_fates = zeros(total_cats_primary_fate,1);

for k=1:total_unu_keys_entire_pool

    if flags_all_variables(k,3)==1 % those unus that have time in use data

        fff = allunudata(k).distributions.new_pdf_time_use;
        
        if size(fff,1)==1 && size(fff,2) > 1
            fff = transpose(fff); % making sure we have a column
        end
       
        aux_vector_time_use = aux_vector_time_use + fff;
        
    end 
    
    if flags_all_variables(k,4)==1 % those unus that have time in storage data

        fff = allunudata(k).distributions.new_pdf_time_storage;
        
        if size(fff,1)==1 && size(fff,2) > 1
            fff = transpose(fff); % making sure we have a column
        end
        
        aux_vector_time_storage = aux_vector_time_storage + fff;
        
    end
    
    if flags_all_variables(k,7)==1 % unus with fates data
                
        fff = allunudata(k).distributions.freq_fate;
        
        if size(fff,1)==1 && size(fff,2) > 1
            fff = transpose(fff); % making sure we have a column
        end
        
        aux_vector_fates = aux_vector_fates + fff;
        
    end
    
end

pdf_time_use_average_entire_pool = aux_vector_time_use / total_unu_keys_time_use;

pdf_time_storage_average_entire_pool = aux_vector_time_storage / total_unu_keys_time_storage;

freq_fate_average_entire_pool = aux_vector_fates / total_unu_keys_fate;

% sum(pdf_time_use_average)
% sum(pdf_time_storage_average)
% sum(pdf_fate_average)

% now derive averages for each group of similar unu keys (based on unu
% centuries), subject to data availability, and score the accuracy of
% extrapolation

for c = 1:total_unu_centuries_entire_pool 

    % locate all unus in the current century
    ind_cent = find(100*floor(unu_keys_entire_pool/100) == unu_centuries_entire_pool(c)); 
    
    aux_vector_time_use = zeros(total_new_pdf_timesteps,1);
    aux_vector_time_storage = zeros(total_new_pdf_timesteps,1);
    aux_vector_fates = zeros(total_cats_primary_fate,1);
    
    % time in use
    
    col_time_use = 3;
    
    if sum(flags_all_variables(ind_cent,col_time_use)) == 0 % in this case there are no unus in the given century with the required data => use average across the entire pool
    
        for k=ind_cent(1):ind_cent(end) % over unus in the given century
            
            allunudata(k).distributions.new_pdf_time_use = pdf_time_use_average_entire_pool;
        
        end
        
        scores_time_and_fate_unu_extrap(ind_cent,1) = 2; % lowest score
        
    elseif sum(flags_all_variables(ind_cent,col_time_use)) == numel(ind_cent) % in this case all unus in the given century have the required data => don't do anything
        
        scores_time_and_fate_unu_extrap(ind_cent,1) = 5; % highest score
        
    else % in this case there are unus in the given century with non-rivial data => average across them
          
        ccc = 0;
        
        for k=ind_cent(1):ind_cent(end) % over unus in the given century
            
            if flags_all_variables(k,col_time_use)==1 % those unus that have the required data

                scores_time_and_fate_unu_extrap(k,1) = 5; % highest score
                
                ccc = ccc + 1;
                
                fff = allunudata(k).distributions.new_pdf_time_use;

                if size(fff,1)==1 && size(fff,2) > 1
                    fff = transpose(fff); % making sure we have a column
                end

                aux_vector_time_use = aux_vector_time_use + fff;

            end 
     
        end
        
        aux_average_vector = aux_vector_time_use / ccc;
        
        % recording the century-specific distribution to the unus that do
        % not have the data
        
        for k=ind_cent(1):ind_cent(end) % over unus in the given century
        
            if flags_all_variables(k,col_time_use)==0 % those unus that do not have the required data
                                       
                allunudata(k).distributions.new_pdf_time_use = aux_average_vector;
                
                scores_time_and_fate_unu_extrap(k,1) = 3.5; % medium score
                
            end 
            
        end
                
    end
    
    % time in storage
    
    col_time_storage = 4;
    
    if sum(flags_all_variables(ind_cent,col_time_storage)) == 0 % in this case there are no unus in the given century with the required data => use average across the entire pool
    
        for k=ind_cent(1):ind_cent(end) % over unus in the given century
            
            allunudata(k).distributions.new_pdf_time_storage = pdf_time_storage_average_entire_pool;
        
        end
        
        scores_time_and_fate_unu_extrap(ind_cent,2) = 2; % lowest score
        
    elseif sum(flags_all_variables(ind_cent,col_time_storage)) == numel(ind_cent) % in this case all unus in the given century have the required data => don't do anything
        
        scores_time_and_fate_unu_extrap(ind_cent,2) = 5; % highest score
        
    else % in this case there are unus in the given century with non-rivial data => average across them
          
        ccc = 0;
        
        for k=ind_cent(1):ind_cent(end) % over unus in the given century
        
            if flags_all_variables(k,col_time_storage)==1 % those unus that have the required data

                scores_time_and_fate_unu_extrap(k,2) = 5; % highest score
                
                ccc = ccc + 1;
                
                fff = allunudata(k).distributions.new_pdf_time_storage;

                if size(fff,1)==1 && size(fff,2) > 1
                    fff = transpose(fff); % making sure we have a column
                end

                aux_vector_time_storage = aux_vector_time_storage + fff;

            end 
     
        end
        
        aux_average_vector = aux_vector_time_storage / ccc;
        
        % recording the century-specific distribution to the unus that do
        % not have the data
        
        for k=ind_cent(1):ind_cent(end) % over unus in the given century
        
            if flags_all_variables(k,col_time_storage)==0 % those unus that do not have the required data
        
                allunudata(k).distributions.new_pdf_time_storage = aux_average_vector;
                
                scores_time_and_fate_unu_extrap(k,2) = 3.5; % medium score
                
            end 
            
        end
                
    end
    
    % fates
    
    col_fate = 7;
    
    if sum(flags_all_variables(ind_cent,col_fate)) == 0 % in this case there are no unus in the given century with the required data => use average across the entire pool
    
        for k=ind_cent(1):ind_cent(end) % over unus in the given century
            
            allunudata(k).distributions.freq_fate = freq_fate_average_entire_pool;
        
        end
        
        scores_time_and_fate_unu_extrap(ind_cent,3) = 2; % lowest score
    
    elseif sum(flags_all_variables(ind_cent,col_fate)) == numel(ind_cent) % in this case all unus in the given century have the required data => don't do anything
        
        scores_time_and_fate_unu_extrap(ind_cent,3) = 5; % highest score
        
    else % in this case there are unus in the given century with non-rivial data => average across them
          
        ccc = 0;
        
        for k=ind_cent(1):ind_cent(end) % over unus in the given century
        
            if flags_all_variables(k,col_fate)==1 % those unus that have the required data
                
                scores_time_and_fate_unu_extrap(k,3) = 5; % highest score

                ccc = ccc + 1;
                
                fff = allunudata(k).distributions.freq_fate;

                if size(fff,1)==1 && size(fff,2) > 1
                    fff = transpose(fff); % making sure we have a column
                end

                aux_vector_fates = aux_vector_fates + fff;

            end 
     
        end
        
        aux_average_vector = aux_vector_fates / ccc;
        
        % recording the century-specific distribution to the unus that do
        % not have the data
        
        for k=ind_cent(1):ind_cent(end) % over unus in the given century
        
            if flags_all_variables(k,col_fate)==0 % those unus that do not have the required data
        
                allunudata(k).distributions.freq_fate = aux_average_vector;
                
                scores_time_and_fate_unu_extrap(k,3) = 3.5; % medium score
                
            end 
            
        end
                
    end
     
    %%%
    
end % end of the loop over unu centuries