Spaces:

ojs595
/

gen_predict

Sleeping

App Files Files Community

ojs595 commited on Jul 24

Commit

22b2ce1

verified ·

1 Parent(s): 2c0dc53

Upload 2 files

Browse files

Files changed (2) hide show

app.py +149 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,149 @@

+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import gradio as gr
+import pandas as pd
+import io
+from torch.utils.data import DataLoader, Dataset
+from torch.optim import AdamW
+from sklearn.model_selection import train_test_split
+# 모델과 토크나이저 로드
+MODEL_NAME = "beomi/kcbert-base"
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)
+# 데이터셋 클래스 정의
+class CustomDataset(Dataset):
+    def __init__(self, dataframe, tokenizer, max_len=128):
+        self.tokenizer = tokenizer
+        self.data = dataframe
+        self.max_len = max_len
+    def __len__(self):
+        return len(self.data)
+    def __getitem__(self, index):
+        item = self.data.iloc[index]
+        description = str(item['description'])
+        label = item['label']
+        encoding = self.tokenizer.encode_plus(
+            description,
+            add_special_tokens=True,
+            max_length=self.max_len,
+            return_token_type_ids=False,
+            padding='max_length',
+            truncation=True,
+            return_attention_mask=True,
+            return_tensors='pt',
+        )
+        return {
+            'input_ids': encoding['input_ids'].flatten(),
+            'attention_mask': encoding['attention_mask'].flatten(),
+            'labels': torch.tensor(label, dtype=torch.long)
+        }
+# 훈련 데이터 준비 및 모델 훈련
+def train_model():
+    csv_data = """description,gender
+"그는 축구를 정말 좋아하고, 근육질의 몸매를 가졌다.",남자
+"그녀는 긴 머리를 가졌고, 분홍색 원피스를 입었다.",여자
+"짧은 머리에 정장을 입은 그는 회의에 참석했다.",남자
+"아름다운 목소리로 노래하는 그녀는 가수다.",여자
+"그의 취미는 자동차 정비와 컴퓨터 게임이다.",남자
+"그녀는 섬세한 손길로 아기 인형을 만들었다.",여자
+"군대에서 막 제대한 그는 씩씩해 보였다.",남자
+"그녀는 친구들과 수다 떠는 것을 좋아한다.",여자
+"강력한 리더십으로 팀을 이끄는 모습이 인상적이었다.",남자
+"자신이 직접 만든 쿠키를 주변에 나누어주곤 한다.",여자
+"안일찬",여자
+"""
+    data = pd.read_csv(io.StringIO(csv_data))
+    data['label'] = data['gender'].apply(lambda x: 0 if x == '남자' else 1)
+    train_data, _ = train_test_split(data, test_size=0.2, random_state=42)
+    train_dataset = CustomDataset(train_data, tokenizer)
+    train_loader = DataLoader(train_dataset, batch_size=2)
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.to(device)
+    optimizer = AdamW(model.parameters(), lr=5e-5)
+    print("모델 훈련 시작...")
+    model.train()
+    for epoch in range(3):
+        for batch in train_loader:
+            optimizer.zero_grad()
+            input_ids = batch['input_ids'].to(device)
+            attention_mask = batch['attention_mask'].to(device)
+            labels = batch['labels'].to(device)
+            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
+            loss = outputs.loss
+            loss.backward()
+            optimizer.step()
+        print(f"Epoch {epoch + 1} 완료")
+    print("모델 훈련 완료!")
+# 예측 함수
+def predict_gender(text):
+    if not text.strip():
+        return "텍스트를 입력해주세요."
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.eval()
+    encoding = tokenizer.encode_plus(
+        text,
+        add_special_tokens=True,
+        max_length=128,
+        return_token_type_ids=False,
+        padding='max_length',
+        truncation=True,
+        return_attention_mask=True,
+        return_tensors='pt',
+    )
+    input_ids = encoding['input_ids'].to(device)
+    attention_mask = encoding['attention_mask'].to(device)
+    with torch.no_grad():
+        outputs = model(input_ids, attention_mask=attention_mask)
+        probabilities = torch.nn.functional.softmax(outputs.logits, dim=1)
+        prediction = torch.argmax(outputs.logits, dim=1).flatten().item()
+        confidence = probabilities[0][prediction].item()
+    gender = "남자" if prediction == 0 else "여자"
+    return f"예측 성별: {gender} (신뢰도: {confidence:.2%})"
+# 앱 시작 시 모델 훈련
+print("앱 초기화 중...")
+train_model()
+# Gradio 인터페이스 생성
+iface = gr.Interface(
+    fn=predict_gender,
+    inputs=gr.Textbox(
+        lines=3,
+        placeholder="성별을 예측할 텍스트를 입력하세요.\n예: '그는 축구를 좋아하고 근육질이다.'",
+        label="텍스트 입력"
+    ),
+    outputs=gr.Textbox(label="예측 결과"),
+    title="🤖 AI 성별 예���기",
+    description="입력된 텍스트를 바탕으로 성별을 예측합니다.",
+    examples=[
+        ["그는 축구를 정말 좋아하고, 근육질의 몸매를 가졌다."],
+        ["그녀는 긴 머리를 가졌고, 분홍색 원피스를 입었다."],
+        ["짧은 머리에 정장을 입은 그는 회의에 참석했다."],
+        ["아름다운 목소리로 노래하는 그녀는 가수다."]
+    ],
+    theme=gr.themes.Soft()
+)
+# 앱 실행
+if __name__ == "__main__":
+    iface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+torch
+transformers
+gradio
+pandas
+scikit-learn