Skip to content

Commit 25e1a32

Browse files
committed
Parser: BA-Anträge
1 parent 53c83b4 commit 25e1a32

20 files changed

Lines changed: 781 additions & 224 deletions

protected/RISParser/StadtratsantragData.php renamed to protected/RISParser/AntragData.php

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@
22

33
declare(strict_types=1);
44

5-
class StadtratsantragData
5+
class AntragData
66
{
77
public int $id;
88
public string $antragsnummer;
99
public string $status;
10+
public ?int $baNr = null;
11+
public ?int $baId = null;
1012
public string $title;
1113
public string $wahlperiode;
1214
public ?\DateTime $gestelltAm;
@@ -39,7 +41,7 @@ public static function parseFromHtml(string $html): ?self
3941
$entry = new self();
4042
$entry->title = $match['title'];
4143

42-
if (!preg_match('/<h1[^>]*>.*StR-(Antrag|Anfrage) (?<nummer>[^<]*) <span[^>]*><span>\((?<status>[^)]*)\)<\/span>/siuU', $html, $match)) {
44+
if (!preg_match('/<h1[^>]*>.*(StR|BA)-(Antrag|Anfrage) (?<nummer>[^<]*) <span[^>]*><span>\((?<status>[^)]*)\)<\/span>/siuU', $html, $match)) {
4345
throw new ParsingException('Not found: antragsnummer / status');
4446
}
4547
$entry->antragsnummer = str_replace(' ', '', $match['nummer']);
@@ -55,6 +57,11 @@ public static function parseFromHtml(string $html): ?self
5557
}
5658
$entry->wahlperiode = $match['wahlperiode'];
5759

60+
if (preg_match('/Bezirksausschuss<\/span>:<\/div>\s*<div[^>]*>\s*<a[^>]*gremium\/detail\/(?<baId>\d+)[^\d][^>]*>(?<baNr>\d+ -)/siuU', $html, $match)) {
61+
$entry->baId = intval($match['baId']);
62+
$entry->baNr = intval($match['baNr']);
63+
}
64+
5865
if (preg_match('/<div[^>]*>Gestellt am:<\/div>\s*<div[^>]*>(?<date>\d+\.\d+\.\d+)<\/div>/siuU', $html, $match)) {
5966
$entry->gestelltAm = (\DateTime::createFromFormat('d.m.Y', $match['date']))->setTime(0, 0, 0);
6067
} else {

protected/RISParser/BAAntragParser.php

Lines changed: 74 additions & 163 deletions
Original file line numberDiff line numberDiff line change
@@ -2,135 +2,51 @@
22

33
class BAAntragParser extends RISParser
44
{
5-
private static $MAX_OFFSET = 25000;
6-
private static $MAX_OFFSET_UPDATE = 200;
5+
private BrowserBasedDowloader $browserBasedDowloader;
6+
private CurlBasedDownloader $curlBasedDownloader;
77

8-
public function parse(int $antrag_id): ?Antrag
8+
public function __construct(?BrowserBasedDowloader $browserBasedDowloader = null, ?CurlBasedDownloader $curlBasedDownloader = null)
99
{
10-
$antrag_id = IntVal($antrag_id);
11-
12-
if (SITE_CALL_MODE != "cron") echo "- Antrag $antrag_id\n";
13-
14-
if ($antrag_id == 0) {
15-
RISTools::report_ris_parser_error("Fehler BAAntragParser", "Antrag-ID 0\n" . print_r(debug_backtrace(), true));
16-
return null;
17-
}
18-
19-
$html_details = RISTools::load_file(RIS_BA_BASE_URL . "ba_antraege_details.jsp?Id=$antrag_id&selTyp=");
20-
$html_dokumente = RISTools::load_file(RIS_BA_BASE_URL . "ba_antraege_dokumente.jsp?Id=$antrag_id&selTyp=BA-Antrag");
21-
//$html_ergebnisse = load_file(RIS_BA_BASE_URL . "ris_antrag_ergebnisse.jsp?risid=" . $antrag_id);
22-
23-
$daten = new Antrag();
24-
$daten->id = $antrag_id;
25-
$daten->datum_letzte_aenderung = new CDbExpression('NOW()');
26-
$daten->gestellt_von = "";
27-
$daten->referat = "";
28-
$daten->referent = "";
29-
$daten->antrag_typ = "";
30-
$daten->kurzinfo = "";
31-
$daten->bearbeitung = "";
32-
$daten->initiatorInnen = "";
33-
34-
$dokumente = [];
35-
//$ergebnisse = array();
36-
37-
$dat_details = explode("<!-- bereichsbild, bereichsheadline, allgemeiner text -->", $html_details);
38-
if (!isset($dat_details[1])) {
39-
throw new Exception("Fehlerhaft geladen");
40-
}
41-
$dat_details = explode("<!-- detailbereich -->", $dat_details[1]);
42-
43-
preg_match_all("/class=\"detail_row\">.*detail_label\">(.*)<\/d.*detail_div\">(.*)<\/div/siU", $dat_details[0], $matches);
44-
$betreff_gefunden = false;
45-
for ($i = 0; $i < count($matches[1]); $i++) switch (trim($matches[1][$i])) {
46-
case "Betreff:":
47-
$betreff_gefunden = true;
48-
$daten->betreff = $this->text_simple_clean($matches[2][$i]);
49-
break;
50-
case "Status:":
51-
$daten->status = $this->text_simple_clean($matches[2][$i]);
52-
break;
53-
case "Bearbeitung:":
54-
$daten->bearbeitung = trim(strip_tags($matches[2][$i]));
55-
break;
56-
}
57-
58-
if (!$betreff_gefunden) {
59-
RISTools::report_ris_parser_error("Fehler BAAntragParser", "Kein Betreff\n" . $html_details);
60-
throw new Exception("Betreff nicht gefunden");
61-
}
62-
63-
$dat_details = explode("<!-- bereichsbild, bereichsheadline, allgemeiner text -->", $html_details);
64-
$dat_details = explode("<!-- tabellenfuss -->", $dat_details[1]);
65-
66-
preg_match("/<h3.*>(.*) +(.*)<\/h3/siU", $dat_details[0], $matches);
67-
if (count($matches) == 3) {
68-
$daten->antrags_nr = Antrag::cleanAntragNr($matches[2]);;
69-
switch ($matches[1]) {
70-
case "BA-Antrags-Nummer:":
71-
$daten->typ = Antrag::$TYP_BA_ANTRAG;
72-
break;
73-
case "BV-Empfehlungs-Nummer:":
74-
$daten->typ = Antrag::$TYP_BUERGERVERSAMMLUNG_EMPFEHLUNG;
75-
break;
76-
default:
77-
RISTools::report_ris_parser_error("RIS: Unbekannter BA-Antrags-Typ: " . $antrag_id, $matches[1]);
78-
die();
79-
}
80-
} else {
81-
RISTools::report_ris_parser_error("RIS: Unbekannter BA-Antrags-Typ: " . $antrag_id, $dat_details[0]);
82-
die();
83-
}
84-
85-
preg_match_all("/<span class=\"itext\">(.*)<\/span.*detail_div_(left|right|left_long)\">(.*)<\/div/siU", $dat_details[0], $matches);
86-
for ($i = 0; $i < count($matches[1]); $i++) if ($matches[3][$i] != "&nbsp;") switch ($matches[1][$i]) {
87-
case "Zust&auml;ndiges Referat:":
88-
$daten->referat = $matches[3][$i];
89-
$ref = Referat::getByHtmlName($matches[3][$i]);
90-
$daten->referat_id = ($ref ? $ref->id : null);
91-
break;
92-
case "Gestellt am:":
93-
$daten->gestellt_am = $this->date_de2mysql($matches[3][$i]);
94-
break;
95-
case "Wahlperiode:":
96-
$daten->wahlperiode = $matches[3][$i];
97-
break;
98-
case "Bearbeitungsfrist:":
99-
$daten->bearbeitungsfrist = $this->date_de2mysql($matches[3][$i]);
100-
break;
101-
case "Registriert am:":
102-
$daten->registriert_am = $this->date_de2mysql($matches[3][$i]);
103-
break;
104-
case "Bezirksausschuss:":
105-
$daten->ba_nr = IntVal($matches[3][$i]);
106-
break;
107-
}
10+
$this->browserBasedDowloader = $browserBasedDowloader ?: new BrowserBasedDowloader();
11+
$this->curlBasedDownloader = $curlBasedDownloader ?: new CurlBasedDownloader();
12+
}
10813

109-
preg_match_all("/<li><span class=\"iconcontainer\">.*href=\"(.*)\"[^>]*title=\"([^\"]*)\">(.*)<\/a>/siU", $html_dokumente, $matches);
110-
for ($i = 0; $i < count($matches[1]); $i++) {
111-
$dokumente[] = [
112-
"url" => $matches[1][$i],
113-
"name" => $matches[3][$i],
114-
"name_title" => $matches[2][$i],
115-
];
116-
}
14+
public function parse(int $id): ?Antrag
15+
{
16+
if (SITE_CALL_MODE != "cron") echo "- Antrag $id\n";
11717

118-
/*
119-
$dat_ergebnisse = explode("<!-- tabellenkopf -->", $html_ergebnisse);
120-
$dat_ergebnisse = explode("<!-- tabellenfuss -->", $dat_ergebnisse[1]);
121-
preg_match_all("<tr>.*bghell tdborder\"><a.*\">(.*)<\/a>.*
122-
*/
18+
$html = $this->curlBasedDownloader->loadUrl(RIS_URL_PREFIX . 'antrag/detail/' . $id);
12319

124-
if (!($daten->ba_nr > 0)) {
125-
echo "BA-Antrag $antrag_id:" . "Keine BA-Angabe";
126-
$GLOBALS["RIS_PARSE_ERROR_LOG"][] = "Keine BA-Angabe (Antrag): $antrag_id";
20+
$parsed = AntragData::parseFromHtml($html);
21+
if ($parsed === null) {
12722
return null;
12823
}
12924

25+
$daten = new Antrag();
26+
$daten->id = $id;
27+
$daten->datum_letzte_aenderung = new CDbExpression('NOW()');
28+
$daten->typ = Antrag::TYP_BA_ANTRAG;
29+
$daten->referent = "";
30+
$daten->kurzinfo = "";
31+
$daten->initiatorInnen = implode(', ', $parsed->initiativeNamen);
32+
$daten->gestellt_von = implode(', ', $parsed->gestelltVon);
33+
$daten->betreff = $parsed->title;
34+
$daten->antrags_nr = $parsed->antragsnummer;
35+
$daten->status = $parsed->status;
36+
$daten->bearbeitung = $parsed->bearbeitungsart ?: '';
37+
$daten->antrag_typ = $parsed->typ ?? '';
38+
$daten->referat = $parsed->referatName ?? '';
39+
$daten->referat_id = $parsed->referatId ?? '';
40+
$daten->gestellt_am = $parsed->gestelltAm?->format('Y-m-d');
41+
$daten->wahlperiode = $parsed->wahlperiode;
42+
$daten->bearbeitungsfrist = $parsed->bearbeitungsfrist?->format('Y-m-d');
43+
$daten->erledigt_am = $parsed->erledigtAm?->format('Y-m-d');
44+
$daten->ba_nr = $parsed->baNr;
45+
13046
$aenderungen = "";
13147

13248
/** @var Antrag $alter_eintrag */
133-
$alter_eintrag = Antrag::model()->findByPk($antrag_id);
49+
$alter_eintrag = Antrag::model()->findByPk($id);
13450
$changed = true;
13551
if ($alter_eintrag) {
13652
$changed = false;
@@ -151,7 +67,7 @@ public function parse(int $antrag_id): ?Antrag
15167
if ($changed) {
15268
if ($aenderungen == "") $aenderungen = "Neu angelegt\n";
15369

154-
echo "BA-Antrag $antrag_id: " . $aenderungen;
70+
echo "BA-Antrag $id: " . $aenderungen;
15571

15672
if ($alter_eintrag) {
15773
$alter_eintrag->copyToHistory();
@@ -171,81 +87,54 @@ public function parse(int $antrag_id): ?Antrag
17187
$daten->resetPersonen();
17288
}
17389

174-
foreach ($dokumente as $dok) {
175-
$dok_typ = ($daten->typ == Antrag::$TYP_BA_ANTRAG ? Dokument::TYP_BA_ANTRAG : Dokument::TYP_BUERGERVERSAMMLUNG_EMPFEHLUNG);
176-
$aenderungen .= Dokument::create_if_necessary($dok_typ, $daten, $dok);
90+
foreach ($parsed->dokumentLinks as $dok) {
91+
$aenderungen .= Dokument::create_if_necessary(Dokument::TYP_STADTRAT_ANTRAG, $daten, $dok);
17792
}
17893

17994
if ($aenderungen != "") {
18095
$aend = new RISAenderung();
18196
$aend->ris_id = $daten->id;
18297
$aend->ba_nr = $daten->ba_nr;
183-
$aend->typ = ($daten->typ == Antrag::$TYP_BA_ANTRAG ? RISAenderung::$TYP_BA_ANTRAG : RISAenderung::$TYP_BUERGERVERSAMMLUNG_EMPFEHLUNG);
98+
$aend->typ = ($daten->typ == Antrag::TYP_BA_ANTRAG ? RISAenderung::$TYP_BA_ANTRAG : RISAenderung::$TYP_BUERGERVERSAMMLUNG_EMPFEHLUNG);
18499
$aend->datum = new CDbExpression("NOW()");
185100
$aend->aenderungen = $aenderungen;
186101
$aend->save();
187102

188103
/** @var Antrag $antrag */
189-
$antrag = Antrag::model()->findByPk($antrag_id);
104+
$antrag = Antrag::model()->findByPk($id);
190105
$antrag->datum_letzte_aenderung = new CDbExpression('NOW()'); // Auch bei neuen Dokumenten
191106
$antrag->save();
192107
$antrag->rebuildVorgaenge();
193108
}
194109

195-
return $antrag;
196-
}
197-
198-
public function parseSeite(int $seite, int $first): array
199-
{
200-
if (SITE_CALL_MODE != "cron") echo "BA-Anträge Seite $seite\n";
201-
$text = RISTools::load_file(RIS_BA_BASE_URL . "ba_antraege.jsp?Start=$seite");
202-
203-
$txt = explode("<!-- tabellenkopf -->", $text);
204-
if (!isset($txt[1])) return [];
205-
206-
$txt = explode("<div class=\"ergebnisfuss\">", $txt[1]);
207-
preg_match_all("/ba_antraege_details\.jsp\?Id=([0-9]+)[\"'& ]/siU", $txt[0], $matches);
208-
209-
if ($first && count($matches[1]) > 0) {
210-
RISTools::report_ris_parser_error("BA-Anträge VOLL", "Erste Seite voll: $seite (" . RIS_BA_BASE_URL . "ba_antraege.jsp?Start=$seite)");
211-
}
212-
213-
for ($i = count($matches[1]) - 1; $i >= 0; $i--) try {
214-
$this->parse($matches[1][$i]);
215-
} catch (Exception $e) {
216-
echo " EXCEPTION! " . $e . "\n";
217-
}
218-
return $matches[1];
110+
return $daten;
219111
}
220112

221113
public function parseAll(): void
222114
{
223-
$anz = static::$MAX_OFFSET;
224-
$first = true;
225-
//$anz = 800;
226-
for ($i = $anz; $i >= 0; $i -= 10) {
227-
if (SITE_CALL_MODE != "cron") echo ($anz - $i) . " / $anz\n";
228-
$this->parseSeite($i, $first);
229-
$first = false;
115+
for ($year = 2020; $year <= date('y'); $year++) {
116+
for ($month = 1; $month <= 12; $month++) {
117+
echo "Parsing: $month/$year\n";
118+
$this->parseMonth($year, $month);
119+
}
230120
}
231121
}
232122

233123
public function parseUpdate(): void
234124
{
235-
echo "Updates: BA-Anträge\n";
236-
$loaded_ids = [];
125+
echo "Updates: BA-Anträge (3 Monate)\n";
237126

238-
$anz = static::$MAX_OFFSET_UPDATE;
239-
for ($i = $anz; $i >= 0; $i -= 10) {
240-
$ids = $this->parseSeite($i, false);
241-
$loaded_ids = array_merge($loaded_ids, array_map("IntVal", $ids));
127+
$loaded_ids = [];
128+
for ($i = -3; $i >= 0; $i++) {
129+
$month = (new DateTime())->modify($i . ' month');
130+
$loaded_ids = array_merge($loaded_ids, $this->parseMonth(intval($month->format('Y')), intval($month->format('m'))));
242131
}
243132

244133
$crit = new CDbCriteria();
245-
$crit->condition = "typ='" . addslashes(Antrag::$TYP_BA_ANTRAG) . "' AND status != 'erledigt' AND gestellt_am > NOW() - INTERVAL 2 YEAR AND ((TO_DAYS(bearbeitungsfrist)-TO_DAYS(CURRENT_DATE()) < 14 AND TO_DAYS(bearbeitungsfrist)-TO_DAYS(CURRENT_DATE()) > -14) OR ((TO_DAYS(CURRENT_DATE()) - TO_DAYS(gestellt_am)) % 3) = 0)";
134+
$crit->condition = "typ='" . addslashes(Antrag::TYP_BA_ANTRAG) . "' AND status != 'erledigt' AND gestellt_am > NOW() - INTERVAL 2 YEAR AND ((TO_DAYS(bearbeitungsfrist)-TO_DAYS(CURRENT_DATE()) < 14 AND TO_DAYS(bearbeitungsfrist)-TO_DAYS(CURRENT_DATE()) > -14) OR ((TO_DAYS(CURRENT_DATE()) - TO_DAYS(gestellt_am)) % 3) = 0)";
246135
if (count($loaded_ids) > 0) $crit->addNotInCondition("id", $loaded_ids);
247136

248-
/** @var array|Antrag[] $antraege */
137+
/** @var Antrag[] $antraege */
249138
$antraege = Antrag::model()->findAll($crit);
250139
foreach ($antraege as $antrag) $this->parse($antrag->id);
251140
}
@@ -255,4 +144,26 @@ public function parseQuickUpdate(): void
255144

256145
}
257146

147+
/**
148+
* @return StadtratsantragListEntry[]
149+
* @throws ParsingException
150+
*/
151+
public function parseMonth(int $year, int $month): array
152+
{
153+
$from = new \DateTime($year . '-' . $month . '-1');
154+
$to = (clone $from)->modify('last day of this month');
155+
156+
$html = $this->browserBasedDowloader->downloadDocumentTypeListForPeriod(BrowserBasedDowloader::DOCUMENT_BA_ANTRAG, $from, $to);
157+
158+
$parsedObjects = StadtratsantragListEntry::parseHtmlList($html);
159+
160+
161+
echo count($parsedObjects) . " BA-Anträge gefunden\n";
162+
163+
foreach ($parsedObjects as $object) {
164+
$this->parse($object->id);
165+
}
166+
167+
return $parsedObjects;
168+
}
258169
}

protected/RISParser/BAInitiativeParser.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ public function parse(int $antrag_id): ?Antrag
2020
$daten = new Antrag();
2121
$daten->id = $antrag_id;
2222
$daten->datum_letzte_aenderung = new CDbExpression('NOW()');
23-
$daten->typ = Antrag::$TYP_BA_INITIATIVE;
23+
$daten->typ = Antrag::TYP_BA_INITIATIVE;
2424

2525
$dokumente = [];
2626
//$ergebnisse = array();

protected/RISParser/StadtratsantragParser.php

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,15 @@ public function parse(int $id): ?Antrag
1717

1818
$html = $this->curlBasedDownloader->loadUrl(RIS_URL_PREFIX . 'antrag/detail/' . $id);
1919

20-
$parsed = StadtratsantragData::parseFromHtml($html);
20+
$parsed = AntragData::parseFromHtml($html);
2121
if ($parsed === null) {
2222
return null;
2323
}
2424

2525
$daten = new Antrag();
2626
$daten->id = $id;
2727
$daten->datum_letzte_aenderung = new CDbExpression('NOW()');
28-
$daten->typ = Antrag::$TYP_STADTRAT_ANTRAG;
28+
$daten->typ = Antrag::TYP_STADTRAT_ANTRAG;
2929
$daten->referent = "";
3030
$daten->kurzinfo = "";
3131
$daten->initiatorInnen = implode(', ', $parsed->initiativeNamen);
@@ -138,10 +138,10 @@ public function parseUpdate(): void
138138
}
139139

140140
$crit = new CDbCriteria();
141-
$crit->condition = "typ='" . addslashes(Antrag::$TYP_STADTRAT_ANTRAG) . "' AND status != 'erledigt' AND gestellt_am > NOW() - INTERVAL 2 YEAR AND ((TO_DAYS(bearbeitungsfrist)-TO_DAYS(CURRENT_DATE()) < 14 AND TO_DAYS(bearbeitungsfrist)-TO_DAYS(CURRENT_DATE()) > -14) OR ((TO_DAYS(CURRENT_DATE()) - TO_DAYS(gestellt_am)) % 3) = 0)";
141+
$crit->condition = "typ='" . addslashes(Antrag::TYP_STADTRAT_ANTRAG) . "' AND status != 'erledigt' AND gestellt_am > NOW() - INTERVAL 2 YEAR AND ((TO_DAYS(bearbeitungsfrist)-TO_DAYS(CURRENT_DATE()) < 14 AND TO_DAYS(bearbeitungsfrist)-TO_DAYS(CURRENT_DATE()) > -14) OR ((TO_DAYS(CURRENT_DATE()) - TO_DAYS(gestellt_am)) % 3) = 0)";
142142
if (count($loaded_ids) > 0) $crit->addNotInCondition("id", $loaded_ids);
143143

144-
/** @var array|Antrag[] $antraege */
144+
/** @var Antrag[] $antraege */
145145
$antraege = Antrag::model()->findAll($crit);
146146
foreach ($antraege as $antrag) $this->parse($antrag->id);
147147
}

protected/RISParser/StadtratsvorlageParser.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ public function parse(int $id): ?Antrag
4747
$daten = new Antrag();
4848
$daten->id = $id;
4949
$daten->datum_letzte_aenderung = new CDbExpression('NOW()');
50-
$daten->typ = Antrag::$TYP_STADTRAT_VORLAGE;
50+
$daten->typ = Antrag::TYP_STADTRAT_VORLAGE;
5151
$daten->betreff = $parsed->title;
5252
$daten->status = $parsed->status;
5353
$daten->gestellt_von = "";
@@ -208,7 +208,7 @@ public function parseUpdate(): void
208208
}
209209

210210
$crit = new CDbCriteria();
211-
$crit->condition = "typ='" . addslashes(Antrag::$TYP_STADTRAT_VORLAGE) . "' AND status NOT IN ('Endgültiger Beschluss', 'abgeschlossen') AND gestellt_am > NOW() - INTERVAL 2 YEAR";
211+
$crit->condition = "typ='" . addslashes(Antrag::TYP_STADTRAT_VORLAGE) . "' AND status NOT IN ('Endgültiger Beschluss', 'abgeschlossen') AND gestellt_am > NOW() - INTERVAL 2 YEAR";
212212
if (count($loaded_ids) > 0) $crit->addNotInCondition("id", $loaded_ids);
213213

214214
/** @var array|Antrag[] $antraege */

0 commit comments

Comments
 (0)