File size: 709 Bytes
e54728d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import os
from pathlib import Path

import pandas as pd
from langchain.document_loaders import DataFrameLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter


def load_data():
    
    data_folder = Path("data")
    data_path = os.path.join(data_folder, 'data.csv')
    data = pd.read_csv(data_path, header=None, names=['description'])
    data = pd.DataFrame(data)

    return data

def text_chunk():
    data = load_data()
    text_chunks = DataFrameLoader(
        data, page_content_column="description").load_and_split(
        text_splitter=RecursiveCharacterTextSplitter(
            chunk_size=1000, chunk_overlap=0, length_function=len
            ))
    
    return text_chunks