import pandas as pd | |
import os | |
df = pd.read_csv('./category-counts-by-year.csv') | |
# rename category to subfield | |
df.rename(columns={'category': 'subfield'}, inplace=True) | |
# create dir | |
os.makedirs('subfield_counts', exist_ok=True) | |
# Split the DataFrame based on year and save to separate CSV files | |
for year, group in df.groupby(df['year']): | |
group['normalized_count'] = group['count'] / group['count'].sum() | |
group.to_csv(f'subfield_counts/{year}.csv', index=False) |