import os
from tqdm import tqdm
import numpy as np
import sys

if __name__ == "__main__":
    captions = []
    with open(sys.argv[1]) as f:
        for line in tqdm(f):
            line = line.rstrip().split("\t")
            caption = line[2]
            captions.append(caption)
    lengths = [len(c.split(" ")) for c in captions]
    print(np.mean(lengths))