Here is an example program to create a RSS feed from an HTML page in Python.
You can comment the program below :-)
–Martin
#!/usr/bin/python
# coding: utf-8
"""
Outputs an RSS feed from an HTML page
Martin Monperrus
June 2015
"""
from lxml import etree
import feedgenerator
import requests
import os
# fetching the html page
response = requests.get('https://www.galaxie.enseignementsup-recherche.gouv.fr/ensup/ListesPostesPublies/Emplois_publies_TrieParCorps.html')
doc = response.text
# getting the items
tree = etree.HTML(doc)
items = tree.xpath('//table/tr/td/table/tr')
# creating a feed
feed = feedgenerator.Rss201rev2Feed(title="Foo",
link="https://foo/bar",
description="Foo",
language="fr")
# for each line in the table
for i in items:
# getting the identifier
ids = i.xpath('td[position()=1]/text()')
idposte = 'empty' if len(ids) == 0 else ids[0]
# getting the link
links = i.xpath('td[position()=3]/a/@href')
link = 'empty' if len(links) == 0 else links[0]
# getting the description
descriptions = i.xpath('td[position()=2]/text()')
description = 'empty' if len(descriptions) == 0 else descriptions[0]
# getting the title
titles = i.xpath('td[position()=4]/text()')
title = 'empty' if len(titles) == 0 else titles[0]
feed.add_item(
title=title,
link=link,
description=description,
unique_id=idposte
)
print "Content-type: application/rss+xml; charset=utf8"
print ""
print feed.writeString('utf-8')