Spaces:
Running
Running
Create jobs.py
Browse files
jobs.py
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import datetime
|
| 2 |
+
import time
|
| 3 |
+
import re
|
| 4 |
+
|
| 5 |
+
import requests
|
| 6 |
+
from requests.adapters import HTTPAdapter
|
| 7 |
+
from requests.packages.urllib3.util.retry import Retry
|
| 8 |
+
from requests.exceptions import HTTPError
|
| 9 |
+
|
| 10 |
+
# CONSTANTS
|
| 11 |
+
ENDPOINT_ACCESS_TOKEN = "https://entreprise.francetravail.fr/connexion/oauth2/access_token"
|
| 12 |
+
OFFRES_DEMPLOI_V2_BASE = "https://api.francetravail.io/partenaire/offresdemploi/v2/"
|
| 13 |
+
REFERENTIEL_ENDPOINT = "{}/referentiel".format(OFFRES_DEMPLOI_V2_BASE)
|
| 14 |
+
SEARCH_ENDPOINT = "{}/offres/search".format(OFFRES_DEMPLOI_V2_BASE)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class Api:
|
| 18 |
+
"""
|
| 19 |
+
Class to authentificate and use the methods of the 'API Offres emploi v2' from Emploi Store (Pole Emploi).
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
def __init__(self, client_id, client_secret, verbose=False, proxies=None):
|
| 23 |
+
"""
|
| 24 |
+
Constructor to authentificate to 'Offres d'emploi v2'. Authentification is done using OAuth client credential grant. 'client_id' and 'client_secret' must be specified.
|
| 25 |
+
|
| 26 |
+
Retry mechanisms are implemented in case the user does too many requests (code 429: too many requests) or just because the API might sometimes be unreliable (code 502: bad gateway).
|
| 27 |
+
|
| 28 |
+
:param client_id: the client ID
|
| 29 |
+
:type client_id: str
|
| 30 |
+
:param client_secret: the client secret
|
| 31 |
+
:type client_secret: str
|
| 32 |
+
:param verbose: whether to add verbosity
|
| 33 |
+
:type verbose: bool
|
| 34 |
+
:param proxies: (optional) The proxies configuration
|
| 35 |
+
:type proxies: dict with keys 'http' and/or 'https'
|
| 36 |
+
:returns: None
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
:Example 1:
|
| 40 |
+
|
| 41 |
+
>>> from offres_demploi import Api
|
| 42 |
+
>>> client = Api(client_id="<your_client_id>", client_secret="<your_client_secret")
|
| 43 |
+
|
| 44 |
+
:Example 2:
|
| 45 |
+
>>> from offres_demploi import Api
|
| 46 |
+
>>> proxy = "localhost:3128"
|
| 47 |
+
>>> proxies = {"http": proxy, "https": proxy}
|
| 48 |
+
>>> client_id = "<your_client_id>"
|
| 49 |
+
>>> client_secret = "<your_client_secret"
|
| 50 |
+
>>> client = Api(client_id=CLIENT_ID, client_secret=CLIENT_SECRET, proxies=proxies)
|
| 51 |
+
"""
|
| 52 |
+
self.client_id = client_id
|
| 53 |
+
self.client_secret = client_secret
|
| 54 |
+
self.verbose = verbose
|
| 55 |
+
self.proxies = proxies
|
| 56 |
+
self.timeout = 60
|
| 57 |
+
session = requests.Session()
|
| 58 |
+
retry = Retry(
|
| 59 |
+
total=3,
|
| 60 |
+
backoff_factor=1,
|
| 61 |
+
status_forcelist=(
|
| 62 |
+
502,
|
| 63 |
+
429,
|
| 64 |
+
), # 429 for too many requests and 502 for bad gateway
|
| 65 |
+
respect_retry_after_header=False,
|
| 66 |
+
)
|
| 67 |
+
adapter = HTTPAdapter(max_retries=retry)
|
| 68 |
+
session.mount("http://", adapter)
|
| 69 |
+
session.mount("https://", adapter)
|
| 70 |
+
self.session = session
|
| 71 |
+
|
| 72 |
+
def get_token(self):
|
| 73 |
+
"""
|
| 74 |
+
Get the token as a class field (for subsequent use).
|
| 75 |
+
|
| 76 |
+
:rtype: dict
|
| 77 |
+
:returns: A token with fields form API + expires_at custom field
|
| 78 |
+
|
| 79 |
+
:raises HTTPError: Error when requesting the ressource
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
"""
|
| 83 |
+
data = dict(
|
| 84 |
+
grant_type="client_credentials",
|
| 85 |
+
client_id=self.client_id,
|
| 86 |
+
client_secret=self.client_secret,
|
| 87 |
+
scope="api_offresdemploiv2 o2dsoffre application_{}".format(
|
| 88 |
+
self.client_id
|
| 89 |
+
),
|
| 90 |
+
)
|
| 91 |
+
headers = {"content-type": "application/x-www-form-urlencoded"}
|
| 92 |
+
params = dict(realm="/partenaire")
|
| 93 |
+
current_time = datetime.datetime.today()
|
| 94 |
+
r = requests.post(
|
| 95 |
+
url=ENDPOINT_ACCESS_TOKEN,
|
| 96 |
+
headers=headers,
|
| 97 |
+
data=data,
|
| 98 |
+
params=params,
|
| 99 |
+
timeout=self.timeout,
|
| 100 |
+
proxies=self.proxies,
|
| 101 |
+
)
|
| 102 |
+
try:
|
| 103 |
+
r.raise_for_status()
|
| 104 |
+
except HTTPError as error:
|
| 105 |
+
if r.status_code == 400:
|
| 106 |
+
complete_message = str(error) + "\n" + str(r.json())
|
| 107 |
+
raise HTTPError(complete_message)
|
| 108 |
+
else:
|
| 109 |
+
raise error
|
| 110 |
+
else:
|
| 111 |
+
token = r.json()
|
| 112 |
+
token["expires_at"] = current_time + datetime.timedelta(
|
| 113 |
+
seconds=token["expires_in"]
|
| 114 |
+
)
|
| 115 |
+
self.token = token
|
| 116 |
+
return token
|
| 117 |
+
|
| 118 |
+
def is_expired(self):
|
| 119 |
+
"""
|
| 120 |
+
Test if the broken as expired (based on the 'expires_at' field)
|
| 121 |
+
|
| 122 |
+
:rtype: boolean
|
| 123 |
+
:returns: True if the token has expired, False otherwise
|
| 124 |
+
|
| 125 |
+
"""
|
| 126 |
+
expired = datetime.datetime.today() >= self.token["expires_at"]
|
| 127 |
+
return expired
|
| 128 |
+
|
| 129 |
+
def get_headers(self):
|
| 130 |
+
"""
|
| 131 |
+
:rtype: dict
|
| 132 |
+
:returns: The headers necessary to do requests. Will ask a new token if it has expired since or it has never been requested
|
| 133 |
+
"""
|
| 134 |
+
if not hasattr(self, "token"):
|
| 135 |
+
if self.verbose:
|
| 136 |
+
print("Token has not been requested yet. Requesting token")
|
| 137 |
+
self.get_token()
|
| 138 |
+
elif self.is_expired():
|
| 139 |
+
if self.verbose:
|
| 140 |
+
print("Token is expired. Requesting new token")
|
| 141 |
+
self.get_token()
|
| 142 |
+
headers = {
|
| 143 |
+
"Authorization": "Bearer {}".format(self.token["access_token"])
|
| 144 |
+
}
|
| 145 |
+
return headers
|
| 146 |
+
|
| 147 |
+
def referentiel(self, referentiel):
|
| 148 |
+
"""
|
| 149 |
+
Get dictionary of 'referentiel'.
|
| 150 |
+
'Réferentiel' available: domaine, appellations (domaines professionnelles ROME), metiers, themes, continents,
|
| 151 |
+
pays, regions, departements , communes , secteursActivites, naturesContrats, typesContrats, niveauxFormations,
|
| 152 |
+
permis, langues
|
| 153 |
+
|
| 154 |
+
Full list available at: https://www.emploi-store-dev.fr/portail-developpeur-cms/home/catalogue-des-api/documentation-des-api/api/api-offres-demploi-v2/referentiels.html
|
| 155 |
+
|
| 156 |
+
:param referentiel: The 'referentiel' to look for
|
| 157 |
+
:type referentiel: str
|
| 158 |
+
:raises HTTPError: Error when requesting the ressource
|
| 159 |
+
:rtype: dict
|
| 160 |
+
:returns: The 'referentiel' with the keys 'code' for the acronyme/abbreviation and 'libelle' for the full name.
|
| 161 |
+
|
| 162 |
+
:Example:
|
| 163 |
+
|
| 164 |
+
>>> client.referentiel("themes")
|
| 165 |
+
|
| 166 |
+
"""
|
| 167 |
+
referentiel_endpoint = "{}/{}".format(REFERENTIEL_ENDPOINT, referentiel)
|
| 168 |
+
|
| 169 |
+
r = self.session.get(
|
| 170 |
+
url=referentiel_endpoint,
|
| 171 |
+
headers=self.get_headers(),
|
| 172 |
+
timeout=self.timeout,
|
| 173 |
+
proxies=self.proxies,
|
| 174 |
+
)
|
| 175 |
+
try:
|
| 176 |
+
r.raise_for_status()
|
| 177 |
+
except Exception as e:
|
| 178 |
+
raise e
|
| 179 |
+
else:
|
| 180 |
+
return r.json()
|
| 181 |
+
|
| 182 |
+
def search(self, params=None, silent_http_errors=False):
|
| 183 |
+
"""
|
| 184 |
+
Make job search based on parameters defined in:
|
| 185 |
+
https://www.emploi-store-dev.fr/portail-developpeur-cms/home/catalogue-des-api/documentation-des-api/api/api-offres-demploi-v2/rechercher-par-criteres.html
|
| 186 |
+
|
| 187 |
+
:param params: The parameters of the search request
|
| 188 |
+
:type param: dict
|
| 189 |
+
:param silent_http_errors: Silent HTTP errors if True, raise error otherwise. Default is False
|
| 190 |
+
:type silent_http_errors: bool
|
| 191 |
+
|
| 192 |
+
:raises HTTPError: Error when requesting the ressource
|
| 193 |
+
|
| 194 |
+
:rtype: dict
|
| 195 |
+
:returns: A dictionary with three fields:
|
| 196 |
+
- 'filtresPossibles', that display the aggregates output
|
| 197 |
+
- 'resultats': that is the job offers
|
| 198 |
+
- 'Content-Range': the current range index ('first_index' and 'last_index') and the maximum result index ('max_results')
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
:Example:
|
| 202 |
+
>>> params = {}
|
| 203 |
+
>>> params.update({"MotsCles": "Ouvrier"})
|
| 204 |
+
>>> params.update({"minCreationDate": "2020-01-01T00:00:00Z"})
|
| 205 |
+
>>> client.search(params=params)
|
| 206 |
+
"""
|
| 207 |
+
if self.verbose:
|
| 208 |
+
print('Making request with params {}'.format(params))
|
| 209 |
+
r = self.session.get(
|
| 210 |
+
url=SEARCH_ENDPOINT,
|
| 211 |
+
params=params,
|
| 212 |
+
headers=self.get_headers(),
|
| 213 |
+
timeout=self.timeout,
|
| 214 |
+
proxies=self.proxies,
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
try:
|
| 218 |
+
r.raise_for_status()
|
| 219 |
+
except HTTPError as error:
|
| 220 |
+
if r.status_code == 400:
|
| 221 |
+
complete_message = str(error) + "\n" + r.json()["message"]
|
| 222 |
+
if silent_http_errors:
|
| 223 |
+
print(complete_message)
|
| 224 |
+
else:
|
| 225 |
+
raise HTTPError(complete_message)
|
| 226 |
+
else:
|
| 227 |
+
if silent_http_errors:
|
| 228 |
+
print(str(error))
|
| 229 |
+
else:
|
| 230 |
+
raise error
|
| 231 |
+
else:
|
| 232 |
+
found_range = re.search(
|
| 233 |
+
pattern="offres (?P<first_index>\d+)-(?P<last_index>\d+)/(?P<max_results>\d+)",
|
| 234 |
+
string=r.headers["Content-Range"],
|
| 235 |
+
).groupdict()
|
| 236 |
+
out = r.json()
|
| 237 |
+
out.update({"Content-Range": found_range})
|
| 238 |
+
return out
|