22
33class BAAntragParser extends RISParser
44{
5- private static $ MAX_OFFSET = 25000 ;
6- private static $ MAX_OFFSET_UPDATE = 200 ;
5+ private BrowserBasedDowloader $ browserBasedDowloader ;
6+ private CurlBasedDownloader $ curlBasedDownloader ;
77
8- public function parse ( int $ antrag_id ): ? Antrag
8+ public function __construct (? BrowserBasedDowloader $ browserBasedDowloader = null , ? CurlBasedDownloader $ curlBasedDownloader = null )
99 {
10- $ antrag_id = IntVal ($ antrag_id );
11-
12- if (SITE_CALL_MODE != "cron " ) echo "- Antrag $ antrag_id \n" ;
13-
14- if ($ antrag_id == 0 ) {
15- RISTools::report_ris_parser_error ("Fehler BAAntragParser " , "Antrag-ID 0 \n" . print_r (debug_backtrace (), true ));
16- return null ;
17- }
18-
19- $ html_details = RISTools::load_file (RIS_BA_BASE_URL . "ba_antraege_details.jsp?Id= $ antrag_id&selTyp= " );
20- $ html_dokumente = RISTools::load_file (RIS_BA_BASE_URL . "ba_antraege_dokumente.jsp?Id= $ antrag_id&selTyp=BA-Antrag " );
21- //$html_ergebnisse = load_file(RIS_BA_BASE_URL . "ris_antrag_ergebnisse.jsp?risid=" . $antrag_id);
22-
23- $ daten = new Antrag ();
24- $ daten ->id = $ antrag_id ;
25- $ daten ->datum_letzte_aenderung = new CDbExpression ('NOW() ' );
26- $ daten ->gestellt_von = "" ;
27- $ daten ->referat = "" ;
28- $ daten ->referent = "" ;
29- $ daten ->antrag_typ = "" ;
30- $ daten ->kurzinfo = "" ;
31- $ daten ->bearbeitung = "" ;
32- $ daten ->initiatorInnen = "" ;
33-
34- $ dokumente = [];
35- //$ergebnisse = array();
36-
37- $ dat_details = explode ("<!-- bereichsbild, bereichsheadline, allgemeiner text --> " , $ html_details );
38- if (!isset ($ dat_details [1 ])) {
39- throw new Exception ("Fehlerhaft geladen " );
40- }
41- $ dat_details = explode ("<!-- detailbereich --> " , $ dat_details [1 ]);
42-
43- preg_match_all ("/class= \"detail_row \">.*detail_label \">(.*)<\/d.*detail_div \">(.*)<\/div/siU " , $ dat_details [0 ], $ matches );
44- $ betreff_gefunden = false ;
45- for ($ i = 0 ; $ i < count ($ matches [1 ]); $ i ++) switch (trim ($ matches [1 ][$ i ])) {
46- case "Betreff: " :
47- $ betreff_gefunden = true ;
48- $ daten ->betreff = $ this ->text_simple_clean ($ matches [2 ][$ i ]);
49- break ;
50- case "Status: " :
51- $ daten ->status = $ this ->text_simple_clean ($ matches [2 ][$ i ]);
52- break ;
53- case "Bearbeitung: " :
54- $ daten ->bearbeitung = trim (strip_tags ($ matches [2 ][$ i ]));
55- break ;
56- }
57-
58- if (!$ betreff_gefunden ) {
59- RISTools::report_ris_parser_error ("Fehler BAAntragParser " , "Kein Betreff \n" . $ html_details );
60- throw new Exception ("Betreff nicht gefunden " );
61- }
62-
63- $ dat_details = explode ("<!-- bereichsbild, bereichsheadline, allgemeiner text --> " , $ html_details );
64- $ dat_details = explode ("<!-- tabellenfuss --> " , $ dat_details [1 ]);
65-
66- preg_match ("/<h3.*>(.*) +(.*)<\/h3/siU " , $ dat_details [0 ], $ matches );
67- if (count ($ matches ) == 3 ) {
68- $ daten ->antrags_nr = Antrag::cleanAntragNr ($ matches [2 ]);;
69- switch ($ matches [1 ]) {
70- case "BA-Antrags-Nummer: " :
71- $ daten ->typ = Antrag::$ TYP_BA_ANTRAG ;
72- break ;
73- case "BV-Empfehlungs-Nummer: " :
74- $ daten ->typ = Antrag::$ TYP_BUERGERVERSAMMLUNG_EMPFEHLUNG ;
75- break ;
76- default :
77- RISTools::report_ris_parser_error ("RIS: Unbekannter BA-Antrags-Typ: " . $ antrag_id , $ matches [1 ]);
78- die ();
79- }
80- } else {
81- RISTools::report_ris_parser_error ("RIS: Unbekannter BA-Antrags-Typ: " . $ antrag_id , $ dat_details [0 ]);
82- die ();
83- }
84-
85- preg_match_all ("/<span class= \"itext \">(.*)<\/span.*detail_div_(left|right|left_long) \">(.*)<\/div/siU " , $ dat_details [0 ], $ matches );
86- for ($ i = 0 ; $ i < count ($ matches [1 ]); $ i ++) if ($ matches [3 ][$ i ] != " " ) switch ($ matches [1 ][$ i ]) {
87- case "Zuständiges Referat: " :
88- $ daten ->referat = $ matches [3 ][$ i ];
89- $ ref = Referat::getByHtmlName ($ matches [3 ][$ i ]);
90- $ daten ->referat_id = ($ ref ? $ ref ->id : null );
91- break ;
92- case "Gestellt am: " :
93- $ daten ->gestellt_am = $ this ->date_de2mysql ($ matches [3 ][$ i ]);
94- break ;
95- case "Wahlperiode: " :
96- $ daten ->wahlperiode = $ matches [3 ][$ i ];
97- break ;
98- case "Bearbeitungsfrist: " :
99- $ daten ->bearbeitungsfrist = $ this ->date_de2mysql ($ matches [3 ][$ i ]);
100- break ;
101- case "Registriert am: " :
102- $ daten ->registriert_am = $ this ->date_de2mysql ($ matches [3 ][$ i ]);
103- break ;
104- case "Bezirksausschuss: " :
105- $ daten ->ba_nr = IntVal ($ matches [3 ][$ i ]);
106- break ;
107- }
10+ $ this ->browserBasedDowloader = $ browserBasedDowloader ?: new BrowserBasedDowloader ();
11+ $ this ->curlBasedDownloader = $ curlBasedDownloader ?: new CurlBasedDownloader ();
12+ }
10813
109- preg_match_all ("/<li><span class= \"iconcontainer \">.*href= \"(.*) \"[^>]*title= \"([^ \"]*) \">(.*)<\/a>/siU " , $ html_dokumente , $ matches );
110- for ($ i = 0 ; $ i < count ($ matches [1 ]); $ i ++) {
111- $ dokumente [] = [
112- "url " => $ matches [1 ][$ i ],
113- "name " => $ matches [3 ][$ i ],
114- "name_title " => $ matches [2 ][$ i ],
115- ];
116- }
14+ public function parse (int $ id ): ?Antrag
15+ {
16+ if (SITE_CALL_MODE != "cron " ) echo "- Antrag $ id \n" ;
11717
118- /*
119- $dat_ergebnisse = explode("<!-- tabellenkopf -->", $html_ergebnisse);
120- $dat_ergebnisse = explode("<!-- tabellenfuss -->", $dat_ergebnisse[1]);
121- preg_match_all("<tr>.*bghell tdborder\"><a.*\">(.*)<\/a>.*
122- */
18+ $ html = $ this ->curlBasedDownloader ->loadUrl (RIS_URL_PREFIX . 'antrag/detail/ ' . $ id );
12319
124- if (!($ daten ->ba_nr > 0 )) {
125- echo "BA-Antrag $ antrag_id: " . "Keine BA-Angabe " ;
126- $ GLOBALS ["RIS_PARSE_ERROR_LOG " ][] = "Keine BA-Angabe (Antrag): $ antrag_id " ;
20+ $ parsed = AntragData::parseFromHtml ($ html );
21+ if ($ parsed === null ) {
12722 return null ;
12823 }
12924
25+ $ daten = new Antrag ();
26+ $ daten ->id = $ id ;
27+ $ daten ->datum_letzte_aenderung = new CDbExpression ('NOW() ' );
28+ $ daten ->typ = Antrag::TYP_BA_ANTRAG ;
29+ $ daten ->referent = "" ;
30+ $ daten ->kurzinfo = "" ;
31+ $ daten ->initiatorInnen = implode (', ' , $ parsed ->initiativeNamen );
32+ $ daten ->gestellt_von = implode (', ' , $ parsed ->gestelltVon );
33+ $ daten ->betreff = $ parsed ->title ;
34+ $ daten ->antrags_nr = $ parsed ->antragsnummer ;
35+ $ daten ->status = $ parsed ->status ;
36+ $ daten ->bearbeitung = $ parsed ->bearbeitungsart ?: '' ;
37+ $ daten ->antrag_typ = $ parsed ->typ ?? '' ;
38+ $ daten ->referat = $ parsed ->referatName ?? '' ;
39+ $ daten ->referat_id = $ parsed ->referatId ?? '' ;
40+ $ daten ->gestellt_am = $ parsed ->gestelltAm ?->format('Y-m-d ' );
41+ $ daten ->wahlperiode = $ parsed ->wahlperiode ;
42+ $ daten ->bearbeitungsfrist = $ parsed ->bearbeitungsfrist ?->format('Y-m-d ' );
43+ $ daten ->erledigt_am = $ parsed ->erledigtAm ?->format('Y-m-d ' );
44+ $ daten ->ba_nr = $ parsed ->baNr ;
45+
13046 $ aenderungen = "" ;
13147
13248 /** @var Antrag $alter_eintrag */
133- $ alter_eintrag = Antrag::model ()->findByPk ($ antrag_id );
49+ $ alter_eintrag = Antrag::model ()->findByPk ($ id );
13450 $ changed = true ;
13551 if ($ alter_eintrag ) {
13652 $ changed = false ;
@@ -151,7 +67,7 @@ public function parse(int $antrag_id): ?Antrag
15167 if ($ changed ) {
15268 if ($ aenderungen == "" ) $ aenderungen = "Neu angelegt \n" ;
15369
154- echo "BA-Antrag $ antrag_id : " . $ aenderungen ;
70+ echo "BA-Antrag $ id : " . $ aenderungen ;
15571
15672 if ($ alter_eintrag ) {
15773 $ alter_eintrag ->copyToHistory ();
@@ -171,81 +87,54 @@ public function parse(int $antrag_id): ?Antrag
17187 $ daten ->resetPersonen ();
17288 }
17389
174- foreach ($ dokumente as $ dok ) {
175- $ dok_typ = ($ daten ->typ == Antrag::$ TYP_BA_ANTRAG ? Dokument::TYP_BA_ANTRAG : Dokument::TYP_BUERGERVERSAMMLUNG_EMPFEHLUNG );
176- $ aenderungen .= Dokument::create_if_necessary ($ dok_typ , $ daten , $ dok );
90+ foreach ($ parsed ->dokumentLinks as $ dok ) {
91+ $ aenderungen .= Dokument::create_if_necessary (Dokument::TYP_STADTRAT_ANTRAG , $ daten , $ dok );
17792 }
17893
17994 if ($ aenderungen != "" ) {
18095 $ aend = new RISAenderung ();
18196 $ aend ->ris_id = $ daten ->id ;
18297 $ aend ->ba_nr = $ daten ->ba_nr ;
183- $ aend ->typ = ($ daten ->typ == Antrag::$ TYP_BA_ANTRAG ? RISAenderung::$ TYP_BA_ANTRAG : RISAenderung::$ TYP_BUERGERVERSAMMLUNG_EMPFEHLUNG );
98+ $ aend ->typ = ($ daten ->typ == Antrag::TYP_BA_ANTRAG ? RISAenderung::$ TYP_BA_ANTRAG : RISAenderung::$ TYP_BUERGERVERSAMMLUNG_EMPFEHLUNG );
18499 $ aend ->datum = new CDbExpression ("NOW() " );
185100 $ aend ->aenderungen = $ aenderungen ;
186101 $ aend ->save ();
187102
188103 /** @var Antrag $antrag */
189- $ antrag = Antrag::model ()->findByPk ($ antrag_id );
104+ $ antrag = Antrag::model ()->findByPk ($ id );
190105 $ antrag ->datum_letzte_aenderung = new CDbExpression ('NOW() ' ); // Auch bei neuen Dokumenten
191106 $ antrag ->save ();
192107 $ antrag ->rebuildVorgaenge ();
193108 }
194109
195- return $ antrag ;
196- }
197-
198- public function parseSeite (int $ seite , int $ first ): array
199- {
200- if (SITE_CALL_MODE != "cron " ) echo "BA-Anträge Seite $ seite \n" ;
201- $ text = RISTools::load_file (RIS_BA_BASE_URL . "ba_antraege.jsp?Start= $ seite " );
202-
203- $ txt = explode ("<!-- tabellenkopf --> " , $ text );
204- if (!isset ($ txt [1 ])) return [];
205-
206- $ txt = explode ("<div class= \"ergebnisfuss \"> " , $ txt [1 ]);
207- preg_match_all ("/ba_antraege_details\.jsp\?Id=([0-9]+)[ \"'& ]/siU " , $ txt [0 ], $ matches );
208-
209- if ($ first && count ($ matches [1 ]) > 0 ) {
210- RISTools::report_ris_parser_error ("BA-Anträge VOLL " , "Erste Seite voll: $ seite ( " . RIS_BA_BASE_URL . "ba_antraege.jsp?Start= $ seite) " );
211- }
212-
213- for ($ i = count ($ matches [1 ]) - 1 ; $ i >= 0 ; $ i --) try {
214- $ this ->parse ($ matches [1 ][$ i ]);
215- } catch (Exception $ e ) {
216- echo " EXCEPTION! " . $ e . "\n" ;
217- }
218- return $ matches [1 ];
110+ return $ daten ;
219111 }
220112
221113 public function parseAll (): void
222114 {
223- $ anz = static ::$ MAX_OFFSET ;
224- $ first = true ;
225- //$anz = 800;
226- for ($ i = $ anz ; $ i >= 0 ; $ i -= 10 ) {
227- if (SITE_CALL_MODE != "cron " ) echo ($ anz - $ i ) . " / $ anz \n" ;
228- $ this ->parseSeite ($ i , $ first );
229- $ first = false ;
115+ for ($ year = 2020 ; $ year <= date ('y ' ); $ year ++) {
116+ for ($ month = 1 ; $ month <= 12 ; $ month ++) {
117+ echo "Parsing: $ month/ $ year \n" ;
118+ $ this ->parseMonth ($ year , $ month );
119+ }
230120 }
231121 }
232122
233123 public function parseUpdate (): void
234124 {
235- echo "Updates: BA-Anträge \n" ;
236- $ loaded_ids = [];
125+ echo "Updates: BA-Anträge (3 Monate) \n" ;
237126
238- $ anz = static :: $ MAX_OFFSET_UPDATE ;
239- for ($ i = $ anz ; $ i >= 0 ; $ i -= 10 ) {
240- $ ids = $ this -> parseSeite ($ i, false );
241- $ loaded_ids = array_merge ($ loaded_ids , array_map ( " IntVal " , $ ids ));
127+ $ loaded_ids = [] ;
128+ for ($ i = - 3 ; $ i >= 0 ; $ i++ ) {
129+ $ month = ( new DateTime ())-> modify ($ i . ' month ' );
130+ $ loaded_ids = array_merge ($ loaded_ids , $ this -> parseMonth ( intval ( $ month -> format ( ' Y ' )), intval ( $ month -> format ( ' m ' )) ));
242131 }
243132
244133 $ crit = new CDbCriteria ();
245- $ crit ->condition = "typ=' " . addslashes (Antrag::$ TYP_BA_ANTRAG ) . "' AND status != 'erledigt' AND gestellt_am > NOW() - INTERVAL 2 YEAR AND ((TO_DAYS(bearbeitungsfrist)-TO_DAYS(CURRENT_DATE()) < 14 AND TO_DAYS(bearbeitungsfrist)-TO_DAYS(CURRENT_DATE()) > -14) OR ((TO_DAYS(CURRENT_DATE()) - TO_DAYS(gestellt_am)) % 3) = 0) " ;
134+ $ crit ->condition = "typ=' " . addslashes (Antrag::TYP_BA_ANTRAG ) . "' AND status != 'erledigt' AND gestellt_am > NOW() - INTERVAL 2 YEAR AND ((TO_DAYS(bearbeitungsfrist)-TO_DAYS(CURRENT_DATE()) < 14 AND TO_DAYS(bearbeitungsfrist)-TO_DAYS(CURRENT_DATE()) > -14) OR ((TO_DAYS(CURRENT_DATE()) - TO_DAYS(gestellt_am)) % 3) = 0) " ;
246135 if (count ($ loaded_ids ) > 0 ) $ crit ->addNotInCondition ("id " , $ loaded_ids );
247136
248- /** @var array| Antrag[] $antraege */
137+ /** @var Antrag[] $antraege */
249138 $ antraege = Antrag::model ()->findAll ($ crit );
250139 foreach ($ antraege as $ antrag ) $ this ->parse ($ antrag ->id );
251140 }
@@ -255,4 +144,26 @@ public function parseQuickUpdate(): void
255144
256145 }
257146
147+ /**
148+ * @return StadtratsantragListEntry[]
149+ * @throws ParsingException
150+ */
151+ public function parseMonth (int $ year , int $ month ): array
152+ {
153+ $ from = new \DateTime ($ year . '- ' . $ month . '-1 ' );
154+ $ to = (clone $ from )->modify ('last day of this month ' );
155+
156+ $ html = $ this ->browserBasedDowloader ->downloadDocumentTypeListForPeriod (BrowserBasedDowloader::DOCUMENT_BA_ANTRAG , $ from , $ to );
157+
158+ $ parsedObjects = StadtratsantragListEntry::parseHtmlList ($ html );
159+
160+
161+ echo count ($ parsedObjects ) . " BA-Anträge gefunden \n" ;
162+
163+ foreach ($ parsedObjects as $ object ) {
164+ $ this ->parse ($ object ->id );
165+ }
166+
167+ return $ parsedObjects ;
168+ }
258169}
0 commit comments