File size: 467 Bytes
65cdd86
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
import pandas as pd
import os
df = pd.read_csv('./category-counts-by-year.csv')
# rename category to subfield
df.rename(columns={'category': 'subfield'}, inplace=True)
# create dir
os.makedirs('subfield_counts', exist_ok=True)
# Split the DataFrame based on year and save to separate CSV files
for year, group in df.groupby(df['year']):
    group['normalized_count'] = group['count'] / group['count'].sum()
    group.to_csv(f'subfield_counts/{year}.csv', index=False)