@@ -485,37 +485,45 @@ def download_image(root_url: str, link: str) -> Optional[PIL.Image.Image]:
485485 print (link )
486486 return None
487487
488- def _find_link_favicon (soup , iconformat ):
488+ def _find_link_favicon (soup , iconformat , url ):
489489 items = soup .find_all ("link" , {"rel" : iconformat })
490490 for item in items :
491491 link = item .get ("href" )
492492 if link :
493493 yield link
494494
495- def _find_meta_content (soup , iconformat ):
495+ def _find_meta_content (soup , iconformat , url ):
496496 item = soup .find ("meta" , {"name" : iconformat })
497497 if not item :
498498 return
499499 link = item .get ("content" )
500500 if link :
501501 yield link
502502
503- def _find_property (soup , iconformat ):
503+ def _find_property (soup , iconformat , url ):
504504 items = soup .find_all ("meta" , {"property" : iconformat })
505505 for item in items :
506506 link = item .get ("content" )
507507 if link :
508508 yield link
509509
510- def _find_url (_soup , iconformat ):
510+ def _find_url (_soup , iconformat , url ):
511511 yield iconformat
512512
513+ def _find_google_api_favicon (_soup , iconformat , url ):
514+ url = urllib .parse .quote (url , safe = '' )
515+ #response = requests.get("https://www.google.com/s2/favicons?sz=32&domain=%s" % url, timeout=3)
516+ #link = response.url
517+ link = "https://www.google.com/s2/favicons?sz=32&domain=%s" % url
518+ yield link
519+
513520
514521def download_favicon (url ):
515522 images = []
516523 url = normalize_url (url )
517524 (scheme , netloc , path , _ , _ , _ ) = urllib .parse .urlparse (url )
518525 root_url = "%s://%s" % (scheme , netloc )
526+ api_url = "%s%s" % (netloc , path )
519527
520528 # Check HTML and /favicon.ico
521529 try :
@@ -534,11 +542,12 @@ def download_favicon(url):
534542 ("msapplication-square70x70logo" , _find_meta_content ),
535543 ("og:image" , _find_property ),
536544 ("favicon.ico" , _find_url ),
545+ ("google-api" , _find_google_api_favicon ),
537546 ]
538547
539548 # icons defined in the HTML
540549 for (iconformat , getter ) in iconformats :
541- for link in getter (soup , iconformat ):
550+ for link in getter (soup , iconformat , api_url ):
542551 image = download_image (root_url , link )
543552 if image is not None :
544553 t = tempfile .NamedTemporaryFile (suffix = ".png" , delete = False )
0 commit comments