librus_apix.schedule

This module provides functions for retrieving schedule information from the Librus site, parsing it, and formatting it into a structured representation.

Classes: - Event: Represents an event in the schedule with various attributes like title, subject, day, etc.

Functions: - schedule_detail: Fetches detailed schedule information for a specific prefix and detail URL suffix. - get_schedule: Fetches the schedule for a specific month and year.

Usage:


from librus_apix.client import new_client

# Create a new client instance
client = new_client()
client.get_token(username, password)


# Fetch the schedule for a specific month and year
month = "01"
year = "2024"
include_empty = True
monthly_schedule = get_schedule(client, month, year, include_empty)

# Fetch detailed schedule information
day_one = monthly_schedule[1].href
prefix, suffix = day_one.split("/")
detailed_schedule = schedule_detail(client, prefix, detail_url)

View Source

  1"""
  2This module provides functions for retrieving schedule information from the Librus site, parsing it, and formatting it into a structured representation.
  3
  4Classes:
  5    - Event: Represents an event in the schedule with various attributes like title, subject, day, etc.
  6
  7Functions:
  8    - schedule_detail: Fetches detailed schedule information for a specific prefix and detail URL suffix.
  9    - get_schedule: Fetches the schedule for a specific month and year.
 10
 11Usage:
 12    ```python
 13    from librus_apix.client import new_client
 14
 15    # Create a new client instance
 16    client = new_client()
 17    client.get_token(username, password)
 18
 19
 20    # Fetch the schedule for a specific month and year
 21    month = "01"
 22    year = "2024"
 23    include_empty = True
 24    monthly_schedule = get_schedule(client, month, year, include_empty)
 25
 26    # Fetch detailed schedule information
 27    day_one = monthly_schedule[1].href
 28    prefix, suffix = day_one.split("/")
 29    detailed_schedule = schedule_detail(client, prefix, detail_url)
 30    ```
 31"""
 32
 33import re
 34from collections import defaultdict
 35from dataclasses import dataclass
 36from typing import DefaultDict, Dict, List, Union
 37
 38from bs4 import BeautifulSoup, NavigableString, Tag
 39
 40from librus_apix.client import Client
 41from librus_apix.exceptions import ParseError
 42from librus_apix.helpers import no_access_check
 43
 44
 45@dataclass
 46class Event:
 47    """
 48    Represents an event in the schedule.
 49
 50    Attributes:
 51        title (str): The title of the event.
 52        subject (str): The subject of the event.
 53        data (dict): Additional data associated with the event.
 54        day (str): The day on which the event occurs.
 55        number (Union[int, str]): The number associated with the event.
 56        hour (str): The hour at which the event occurs.
 57        href (str): 'prefix'/'suffix' joined with a slash (this should be reworked...).
 58    """
 59
 60    title: str
 61    subject: str
 62    data: dict
 63    day: str
 64    number: Union[int, str]
 65    hour: str
 66    href: str
 67
 68
 69def schedule_detail(client: Client, prefix: str, detail_url: str) -> Dict[str, str]:
 70    """
 71    Fetches the detailed schedule information for a specific prefix and detail URL suffix.
 72
 73    Args:
 74        client (Client): The client object for making HTTP requests.
 75        prefix (str): The prefix of the schedule URL.
 76        detail_url (str): The detail URL of the schedule.
 77
 78    Returns:
 79        Dict[str, str]: A dictionary containing schedule details.
 80    """
 81    schedule = {}
 82    div = no_access_check(
 83        BeautifulSoup(
 84            client.get(client.SCHEDULE_URL + prefix + "/" + detail_url).text, "lxml"
 85        )
 86    ).find("div", attrs={"class": "container-background"})
 87
 88    if div is None or isinstance(div, NavigableString):
 89        raise ParseError("Error in parsing schedule details.")
 90    tr: List[Tag] = div.find_all("tr", attrs={"class": ["line0", "line1"]})
 91    for s in tr:
 92        th = s.find("th")
 93        td = s.find("td")
 94        if td is None or th is None:
 95            continue
 96        schedule[th.text.strip()] = td.text.strip()
 97    return schedule
 98
 99
100def _parse_title_into_pairs(title: str) -> Dict[str, str]:
101    additional_data = {}
102    pairs = [pair.split(":", 1) for pair in title.split("<br />")]
103    for pair in pairs:
104        if len(pair) != 2:
105            additional_data[pair[0].strip()] = "unknown"
106            continue
107        key, val = pair
108        additional_data[key.strip()] = val.strip()
109
110    return additional_data
111
112
113def get_schedule(
114    client: Client, month: str, year: str, include_empty: bool = False
115) -> DefaultDict[int, List[Event]]:
116    """
117    Fetches the schedule for a specific month and year.
118
119    Args:
120        client (Client): The client object for making HTTP requests.
121        month (str): The month for which the schedule is requested.
122        year (str): The year for which the schedule is requested.
123        include_empty (bool, optional): Flag to include empty schedules. Defaults to False.
124
125    Returns:
126        DefaultDict[int, List[Event]]: A dictionary containing the schedule for each day of the month.
127    """
128    schedule = defaultdict(list)
129    soup = no_access_check(
130        BeautifulSoup(
131            client.post(client.SCHEDULE_URL, data={"rok": year, "miesiac": month}).text,
132            "lxml",
133        )
134    )
135    days = soup.find_all("div", attrs={"class": "kalendarz-dzien"})
136    if len(days) < 1:
137        raise ParseError("Error in parsing days of the schedule.")
138    for day in days:
139        try:
140            d = int(day.find("div", attrs={"class": "kalendarz-numer-dnia"}).text)
141        except:
142            raise ParseError("Error while parsing day number")
143        if include_empty == True:
144            schedule[d] = []
145        tr: List[Tag] = day.find_all("tr")
146        for event in tr:
147            td = event.find("td")
148            if td is None or isinstance(td, NavigableString):
149                continue
150            title = td.attrs.get("title", "Nauczyciel: unknown<br />Opis: unknown")
151            additional_data = _parse_title_into_pairs(title)
152            subject = "unspecified"
153            span = td.find("span")
154            if span is not None:
155                subject = span.text
156                span.extract()
157
158            delimeter = "###"
159            for line in td.select("br"):
160                line.replaceWith(delimeter)
161            data = (
162                td.text.replace("\xa0", " ")
163                .replace(", ", "")
164                .replace("\n", "")
165                .strip()
166                .split(delimeter)
167            )
168            if subject == "unspecified":
169                subject = data[0]
170            if len(data) >= 2:
171                title = data[1]
172            else:
173                title = data[0]
174
175            number = "unknown"
176            hour = "unknown"
177            number_td = event.find("td")
178            if number_td is None or isinstance(number_td, NavigableString):
179                raise ParseError("Error while parsing td_number schedule.")
180            try:
181                number = int(
182                    re.findall(r": ?[0-99]?[0-99]", number_td.text)[0].replace(": ", "")
183                )
184            except ValueError:
185                hour = re.findall(r" ?[0-2]?[0-9]:?[0-5]?[0-9]", number_td.text)[0]
186            except IndexError:
187                pass
188            onclick = number_td.attrs.get("onclick", "'")
189            href = onclick.split("'")[1].split("/")
190            if len(href) >= 2:
191                href = "/".join(href[2:])
192            else:
193                href = ""
194
195            event = Event(title, subject, additional_data, str(d), number, hour, href)
196            schedule[d].append(event)
197    return schedule
198
199
200@dataclass
201class RecentEvent:
202    """
203    The events inside recent_schedule differ a little bit
204    the .data should contain event name, date from to and duration
205    I might be able to extract into separate values if I get html
206    """
207
208    date_added: str
209    type: str
210    data: str
211
212
213def _sanitize_data(data: str) -> str:
214    return (
215        data.replace("&nbsp;", " ")
216        .replace("<br/>", "<br>")
217        .replace("<br>", "\n")
218        .strip()
219    )
220
221
222def get_recently_added_schedule(client: Client) -> List[RecentEvent]:
223    """
224    Events can be viewed only once here, any subsequent call won't have same events
225    Made blindly based on a screenshot, still untested...
226    """
227    events = []
228    soup = no_access_check(
229        BeautifulSoup(
230            client.get(client.RECENT_SCHEDULE_URL).text,
231            "lxml",
232        )
233    )
234    bg = soup.select_one("div.container-background")
235    if bg is None:
236        raise ParseError("Unable to locate recent schedule container-background")
237    table = soup.select_one("table")
238    if table is None:
239        return []
240    rows = table.select("tr")
241    for row in rows:
242        tds = row.select("td")
243        if len(tds) != 4:
244            continue
245        _, date_added, _type, data = tds
246        data = _sanitize_data(data.text)
247        # unsure about that so we'll check
248        if "czas dodania" in date_added and "rodzaj zdarzenia" in _type:
249            continue
250        event = RecentEvent(date_added.text.strip(), _type.text.strip(), data)
251        events.append(event)
252    return events

@dataclass

class Event: View Source

46@dataclass
47class Event:
48    """
49    Represents an event in the schedule.
50
51    Attributes:
52        title (str): The title of the event.
53        subject (str): The subject of the event.
54        data (dict): Additional data associated with the event.
55        day (str): The day on which the event occurs.
56        number (Union[int, str]): The number associated with the event.
57        hour (str): The hour at which the event occurs.
58        href (str): 'prefix'/'suffix' joined with a slash (this should be reworked...).
59    """
60
61    title: str
62    subject: str
63    data: dict
64    day: str
65    number: Union[int, str]
66    hour: str
67    href: str

Represents an event in the schedule.

Attributes: title (str): The title of the event. subject (str): The subject of the event. data (dict): Additional data associated with the event. day (str): The day on which the event occurs. number (Union[int, str]): The number associated with the event. hour (str): The hour at which the event occurs. href (str): 'prefix'/'suffix' joined with a slash (this should be reworked...).

Event( title: str, subject: str, data: dict, day: str, number: Union[int, str], hour: str, href: str)

title: str

subject: str

data: dict

day: str

number: Union[int, str]

hour: str

href: str

def schedule_detail( client: librus_apix.client.Client, prefix: str, detail_url: str) -> Dict[str, str]: View Source

70def schedule_detail(client: Client, prefix: str, detail_url: str) -> Dict[str, str]:
71    """
72    Fetches the detailed schedule information for a specific prefix and detail URL suffix.
73
74    Args:
75        client (Client): The client object for making HTTP requests.
76        prefix (str): The prefix of the schedule URL.
77        detail_url (str): The detail URL of the schedule.
78
79    Returns:
80        Dict[str, str]: A dictionary containing schedule details.
81    """
82    schedule = {}
83    div = no_access_check(
84        BeautifulSoup(
85            client.get(client.SCHEDULE_URL + prefix + "/" + detail_url).text, "lxml"
86        )
87    ).find("div", attrs={"class": "container-background"})
88
89    if div is None or isinstance(div, NavigableString):
90        raise ParseError("Error in parsing schedule details.")
91    tr: List[Tag] = div.find_all("tr", attrs={"class": ["line0", "line1"]})
92    for s in tr:
93        th = s.find("th")
94        td = s.find("td")
95        if td is None or th is None:
96            continue
97        schedule[th.text.strip()] = td.text.strip()
98    return schedule

Fetches the detailed schedule information for a specific prefix and detail URL suffix.

Args: client (Client): The client object for making HTTP requests. prefix (str): The prefix of the schedule URL. detail_url (str): The detail URL of the schedule.

Returns: Dict[str, str]: A dictionary containing schedule details.

def get_schedule( client: librus_apix.client.Client, month: str, year: str, include_empty: bool = False) -> DefaultDict[int, List[Event]]: View Source

114def get_schedule(
115    client: Client, month: str, year: str, include_empty: bool = False
116) -> DefaultDict[int, List[Event]]:
117    """
118    Fetches the schedule for a specific month and year.
119
120    Args:
121        client (Client): The client object for making HTTP requests.
122        month (str): The month for which the schedule is requested.
123        year (str): The year for which the schedule is requested.
124        include_empty (bool, optional): Flag to include empty schedules. Defaults to False.
125
126    Returns:
127        DefaultDict[int, List[Event]]: A dictionary containing the schedule for each day of the month.
128    """
129    schedule = defaultdict(list)
130    soup = no_access_check(
131        BeautifulSoup(
132            client.post(client.SCHEDULE_URL, data={"rok": year, "miesiac": month}).text,
133            "lxml",
134        )
135    )
136    days = soup.find_all("div", attrs={"class": "kalendarz-dzien"})
137    if len(days) < 1:
138        raise ParseError("Error in parsing days of the schedule.")
139    for day in days:
140        try:
141            d = int(day.find("div", attrs={"class": "kalendarz-numer-dnia"}).text)
142        except:
143            raise ParseError("Error while parsing day number")
144        if include_empty == True:
145            schedule[d] = []
146        tr: List[Tag] = day.find_all("tr")
147        for event in tr:
148            td = event.find("td")
149            if td is None or isinstance(td, NavigableString):
150                continue
151            title = td.attrs.get("title", "Nauczyciel: unknown<br />Opis: unknown")
152            additional_data = _parse_title_into_pairs(title)
153            subject = "unspecified"
154            span = td.find("span")
155            if span is not None:
156                subject = span.text
157                span.extract()
158
159            delimeter = "###"
160            for line in td.select("br"):
161                line.replaceWith(delimeter)
162            data = (
163                td.text.replace("\xa0", " ")
164                .replace(", ", "")
165                .replace("\n", "")
166                .strip()
167                .split(delimeter)
168            )
169            if subject == "unspecified":
170                subject = data[0]
171            if len(data) >= 2:
172                title = data[1]
173            else:
174                title = data[0]
175
176            number = "unknown"
177            hour = "unknown"
178            number_td = event.find("td")
179            if number_td is None or isinstance(number_td, NavigableString):
180                raise ParseError("Error while parsing td_number schedule.")
181            try:
182                number = int(
183                    re.findall(r": ?[0-99]?[0-99]", number_td.text)[0].replace(": ", "")
184                )
185            except ValueError:
186                hour = re.findall(r" ?[0-2]?[0-9]:?[0-5]?[0-9]", number_td.text)[0]
187            except IndexError:
188                pass
189            onclick = number_td.attrs.get("onclick", "'")
190            href = onclick.split("'")[1].split("/")
191            if len(href) >= 2:
192                href = "/".join(href[2:])
193            else:
194                href = ""
195
196            event = Event(title, subject, additional_data, str(d), number, hour, href)
197            schedule[d].append(event)
198    return schedule

Fetches the schedule for a specific month and year.

Args: client (Client): The client object for making HTTP requests. month (str): The month for which the schedule is requested. year (str): The year for which the schedule is requested. include_empty (bool, optional): Flag to include empty schedules. Defaults to False.

Returns: DefaultDict[int, List[Event]]: A dictionary containing the schedule for each day of the month.

@dataclass

class RecentEvent: View Source

201@dataclass
202class RecentEvent:
203    """
204    The events inside recent_schedule differ a little bit
205    the .data should contain event name, date from to and duration
206    I might be able to extract into separate values if I get html
207    """
208
209    date_added: str
210    type: str
211    data: str

The events inside recent_schedule differ a little bit the .data should contain event name, date from to and duration I might be able to extract into separate values if I get html

RecentEvent(date_added: str, type: str, data: str)

date_added: str

type: str

data: str

def get_recently_added_schedule( client: librus_apix.client.Client) -> List[RecentEvent]: View Source

223def get_recently_added_schedule(client: Client) -> List[RecentEvent]:
224    """
225    Events can be viewed only once here, any subsequent call won't have same events
226    Made blindly based on a screenshot, still untested...
227    """
228    events = []
229    soup = no_access_check(
230        BeautifulSoup(
231            client.get(client.RECENT_SCHEDULE_URL).text,
232            "lxml",
233        )
234    )
235    bg = soup.select_one("div.container-background")
236    if bg is None:
237        raise ParseError("Unable to locate recent schedule container-background")
238    table = soup.select_one("table")
239    if table is None:
240        return []
241    rows = table.select("tr")
242    for row in rows:
243        tds = row.select("td")
244        if len(tds) != 4:
245            continue
246        _, date_added, _type, data = tds
247        data = _sanitize_data(data.text)
248        # unsure about that so we'll check
249        if "czas dodania" in date_added and "rodzaj zdarzenia" in _type:
250            continue
251        event = RecentEvent(date_added.text.strip(), _type.text.strip(), data)
252        events.append(event)
253    return events

Events can be viewed only once here, any subsequent call won't have same events Made blindly based on a screenshot, still untested...