Update src/ProcessOneSingleCampaign.py
Browse files- src/ProcessOneSingleCampaign.py +67 -14
src/ProcessOneSingleCampaign.py
CHANGED
|
@@ -553,12 +553,22 @@ class CampaignProcessor:
|
|
| 553 |
float: The transformed previous funding goal
|
| 554 |
"""
|
| 555 |
try:
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 562 |
|
| 563 |
except Exception as e:
|
| 564 |
print(f"Error processing previous funding goal for campaign {idx}: {str(e)}")
|
|
@@ -579,12 +589,22 @@ class CampaignProcessor:
|
|
| 579 |
float: The transformed previous pledged amount
|
| 580 |
"""
|
| 581 |
try:
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 588 |
|
| 589 |
except Exception as e:
|
| 590 |
print(f"Error processing pledge amount for campaign {idx}: {str(e)}")
|
|
@@ -595,6 +615,8 @@ class CampaignProcessor:
|
|
| 595 |
Calculate success rate of creator's previous campaigns.
|
| 596 |
|
| 597 |
Computes the ratio of successful previous projects to total previous projects.
|
|
|
|
|
|
|
| 598 |
|
| 599 |
Args:
|
| 600 |
campaign (Dict): Campaign data
|
|
@@ -604,9 +626,20 @@ class CampaignProcessor:
|
|
| 604 |
float: The previous success rate (0-1)
|
| 605 |
"""
|
| 606 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 607 |
previousProjects = float(campaign.get('previous_projects_count', 0))
|
| 608 |
previousSuccessfulProjects = float(campaign.get('previous_successful_projects', 0))
|
| 609 |
|
|
|
|
|
|
|
|
|
|
| 610 |
if previousProjects == 0.0:
|
| 611 |
return 0.0
|
| 612 |
else:
|
|
@@ -639,6 +672,9 @@ class CampaignProcessor:
|
|
| 639 |
"""
|
| 640 |
self._ensure_models_loaded()
|
| 641 |
|
|
|
|
|
|
|
|
|
|
| 642 |
# Generate embeddings for text fields
|
| 643 |
description_embedding, calculated_description_length = self.process_description_embedding(campaign, idx)
|
| 644 |
|
|
@@ -662,10 +698,27 @@ class CampaignProcessor:
|
|
| 662 |
result['previous_pledged'] = self.process_previous_pledged(campaign, idx)
|
| 663 |
|
| 664 |
# Calculate success rate based on previous projects
|
|
|
|
| 665 |
result['previous_success_rate'] = self.calculate_previous_sucess_rate(campaign, idx)
|
| 666 |
|
| 667 |
-
# Extract simple integer features
|
| 668 |
-
for field in ['image_count', 'video_count', 'campaign_duration'
|
| 669 |
result[field] = int(campaign.get(field, 0))
|
| 670 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 671 |
return result
|
|
|
|
| 553 |
float: The transformed previous funding goal
|
| 554 |
"""
|
| 555 |
try:
|
| 556 |
+
# Check if value is already provided in the expected field
|
| 557 |
+
if 'previous_funding_goal' in campaign:
|
| 558 |
+
previous_goal = float(campaign.get('previous_funding_goal', 0))
|
| 559 |
+
print(f"Using provided previous_funding_goal directly: {previous_goal}")
|
| 560 |
+
else:
|
| 561 |
+
print(f"No previous_funding_goal in input data, looking for alternative field names")
|
| 562 |
+
previous_goal = 0.0
|
| 563 |
+
|
| 564 |
+
# Apply logarithmic transformation
|
| 565 |
+
if previous_goal > 0:
|
| 566 |
+
# Log1p transformation, good for general compression while preserving relative differences
|
| 567 |
+
transformed_goal = np.log1p(previous_goal)/np.log(10)
|
| 568 |
+
print(f"Applied log transformation to previous_funding_goal: {previous_goal} → {transformed_goal}")
|
| 569 |
+
return transformed_goal
|
| 570 |
+
else:
|
| 571 |
+
return 0.0
|
| 572 |
|
| 573 |
except Exception as e:
|
| 574 |
print(f"Error processing previous funding goal for campaign {idx}: {str(e)}")
|
|
|
|
| 589 |
float: The transformed previous pledged amount
|
| 590 |
"""
|
| 591 |
try:
|
| 592 |
+
# Check if value is already provided in the expected field
|
| 593 |
+
if 'previous_pledged' in campaign:
|
| 594 |
+
pledged = float(campaign.get('previous_pledged', 0))
|
| 595 |
+
print(f"Using provided previous_pledged directly: {pledged}")
|
| 596 |
+
else:
|
| 597 |
+
print(f"No previous_pledged in input data, looking for alternative field names")
|
| 598 |
+
pledged = 0.0
|
| 599 |
+
|
| 600 |
+
# Apply logarithmic transformation
|
| 601 |
+
if pledged > 0:
|
| 602 |
+
# Log1p transformation, good for general compression while preserving relative differences
|
| 603 |
+
transformed_pledge = np.log1p(pledged)/np.log(10)
|
| 604 |
+
print(f"Applied log transformation to previous_pledged: {pledged} → {transformed_pledge}")
|
| 605 |
+
return transformed_pledge
|
| 606 |
+
else:
|
| 607 |
+
return 0.0
|
| 608 |
|
| 609 |
except Exception as e:
|
| 610 |
print(f"Error processing pledge amount for campaign {idx}: {str(e)}")
|
|
|
|
| 615 |
Calculate success rate of creator's previous campaigns.
|
| 616 |
|
| 617 |
Computes the ratio of successful previous projects to total previous projects.
|
| 618 |
+
Can use either direct 'previous_success_rate' or calculate from
|
| 619 |
+
'previous_successful_projects' and 'previous_projects_count'.
|
| 620 |
|
| 621 |
Args:
|
| 622 |
campaign (Dict): Campaign data
|
|
|
|
| 626 |
float: The previous success rate (0-1)
|
| 627 |
"""
|
| 628 |
try:
|
| 629 |
+
# First check if success rate is provided directly
|
| 630 |
+
if 'previous_success_rate' in campaign:
|
| 631 |
+
# Log the direct usage for debugging
|
| 632 |
+
rate = float(campaign.get('previous_success_rate', 0))
|
| 633 |
+
print(f"Using provided previous_success_rate directly: {rate}")
|
| 634 |
+
return rate
|
| 635 |
+
|
| 636 |
+
# Otherwise calculate from successful projects and total projects
|
| 637 |
previousProjects = float(campaign.get('previous_projects_count', 0))
|
| 638 |
previousSuccessfulProjects = float(campaign.get('previous_successful_projects', 0))
|
| 639 |
|
| 640 |
+
# Log the values used for calculation (for debugging)
|
| 641 |
+
print(f"Calculating success rate from: projects={previousProjects}, successful={previousSuccessfulProjects}")
|
| 642 |
+
|
| 643 |
if previousProjects == 0.0:
|
| 644 |
return 0.0
|
| 645 |
else:
|
|
|
|
| 672 |
"""
|
| 673 |
self._ensure_models_loaded()
|
| 674 |
|
| 675 |
+
# Log the incoming campaign data for debugging
|
| 676 |
+
print(f"Processing campaign {idx} with keys: {list(campaign.keys())}")
|
| 677 |
+
|
| 678 |
# Generate embeddings for text fields
|
| 679 |
description_embedding, calculated_description_length = self.process_description_embedding(campaign, idx)
|
| 680 |
|
|
|
|
| 698 |
result['previous_pledged'] = self.process_previous_pledged(campaign, idx)
|
| 699 |
|
| 700 |
# Calculate success rate based on previous projects
|
| 701 |
+
# Ensure both direct values and calculated values are handled
|
| 702 |
result['previous_success_rate'] = self.calculate_previous_sucess_rate(campaign, idx)
|
| 703 |
|
| 704 |
+
# Extract simple integer features, with specific handling for previous_projects_count
|
| 705 |
+
for field in ['image_count', 'video_count', 'campaign_duration']:
|
| 706 |
result[field] = int(campaign.get(field, 0))
|
| 707 |
|
| 708 |
+
# Special handling for previous_projects_count to ensure consistency
|
| 709 |
+
if 'previous_projects_count' in campaign:
|
| 710 |
+
# Use the value directly from input
|
| 711 |
+
result['previous_projects_count'] = int(campaign.get('previous_projects_count', 0))
|
| 712 |
+
print(f"Using provided previous_projects_count: {result['previous_projects_count']}")
|
| 713 |
+
else:
|
| 714 |
+
# Default to 0 if not provided
|
| 715 |
+
result['previous_projects_count'] = 0
|
| 716 |
+
|
| 717 |
+
# Log the final result for debugging
|
| 718 |
+
print(f"Processed campaign with previous metrics: " +
|
| 719 |
+
f"count={result.get('previous_projects_count')}, " +
|
| 720 |
+
f"rate={result.get('previous_success_rate')}, " +
|
| 721 |
+
f"pledged={result.get('previous_pledged')}, " +
|
| 722 |
+
f"goal={result.get('previous_funding_goal')}")
|
| 723 |
+
|
| 724 |
return result
|