Renamed extension.developer to extension.offeredby and introduced actual extension.developer (capturing the information in the developer DIV such as the privacy policy).

This commit is contained in:
Achim D. Brucker 2020-03-02 10:30:57 +00:00
parent 77895218e4
commit dc0b2b6caa
8 changed files with 15 additions and 7 deletions

View File

@ -146,8 +146,13 @@ def parse_and_insert_overview(ext_id, date, datepath, con):
full_description = str(
description_parent.parent) if description_parent else None
developer_parent = doc.find(
offeredby_parent = doc.find(
class_=lambda cls: cls and "e-f-Me" in cls)
offeredby = "".join([str(x) for x in offeredby_parent.contents
]) if offeredby_parent else None
developer_parent = doc.find(
class_=lambda cls: cls and "C-b-p-rc-D-J" in cls)
developer = "".join([str(x) for x in developer_parent.contents
]) if developer_parent else None
@ -163,6 +168,7 @@ def parse_and_insert_overview(ext_id, date, datepath, con):
contents)
itemcategory = match.group(1) if match else None
con.insert(
"extension",
extid=ext_id,
@ -174,6 +180,7 @@ def parse_and_insert_overview(ext_id, date, datepath, con):
rating=rating,
ratingcount=rating_count,
fulldescription=full_description,
offeredby=offeredby,
developer=developer,
itemcategory=itemcategory,
crx_etag=etag,

View File

@ -30,6 +30,7 @@ CREATE TABLE `extension` (
`rating` double DEFAULT NULL,
`ratingcount` int(11) DEFAULT NULL,
`fulldescription` text /*!100301 COMPRESSED*/ COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`offeredby` text /*!100301 COMPRESSED*/ COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`developer` text /*!100301 COMPRESSED*/ COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`itemcategory` text /*!100301 COMPRESSED*/ COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`crx_etag` varchar(44) COLLATE utf8mb4_unicode_ci DEFAULT NULL,

View File

@ -27,7 +27,7 @@
/*!50001 SET collation_connection = utf8_general_ci */;
/*!50001 CREATE ALGORITHM=UNDEFINED */
/*!50013 DEFINER=`writer`@`%` SQL SECURITY DEFINER */
/*!50001 VIEW `extension_most_recent` AS select `e3`.`extid` AS `extid`,`e3`.`date` AS `date`,`e3`.`name` AS `name`,`e3`.`version` AS `version`,`e3`.`description` AS `description`,`e3`.`downloads` AS `downloads`,`e3`.`rating` AS `rating`,`e3`.`ratingcount` AS `ratingcount`,`e3`.`fulldescription` AS `fulldescription`,`e3`.`developer` AS `developer`,`e3`.`itemcategory` AS `itemcategory`,`e3`.`crx_etag` AS `crx_etag`,`e3`.`lastupdated` AS `lastupdated` from (((select `e1`.`extid` AS `extid`,max(`e1`.`date`) AS `date` from `extensions`.`extension` `e1` group by `e1`.`extid`)) `e2` join `extensions`.`extension` `e3` on(`e2`.`extid` = `e3`.`extid` and `e2`.`date` = `e3`.`date`)) */;
/*!50001 VIEW `extension_most_recent` AS select `e3`.`extid` AS `extid`,`e3`.`date` AS `date`,`e3`.`name` AS `name`,`e3`.`version` AS `version`,`e3`.`description` AS `description`,`e3`.`downloads` AS `downloads`,`e3`.`rating` AS `rating`,`e3`.`ratingcount` AS `ratingcount`,`e3`.`fulldescription` AS `fulldescription`,`e3`.`offeredby` AS `offeredby`,`e3`.`developer` AS `developer`,`e3`.`itemcategory` AS `itemcategory`,`e3`.`crx_etag` AS `crx_etag`,`e3`.`lastupdated` AS `lastupdated` from (((select `e1`.`extid` AS `extid`,max(`e1`.`date`) AS `date` from `extensions`.`extension` `e1` group by `e1`.`extid`)) `e2` join `extensions`.`extension` `e3` on(`e2`.`extid` = `e3`.`extid` and `e2`.`date` = `e3`.`date`)) */;
/*!50001 SET character_set_client = @saved_cs_client */;
/*!50001 SET character_set_results = @saved_cs_results */;
/*!50001 SET collation_connection = @saved_col_connection */;

View File

@ -27,7 +27,7 @@
/*!50001 SET collation_connection = utf8_general_ci */;
/*!50001 CREATE ALGORITHM=UNDEFINED */
/*!50013 DEFINER=`writer`@`%` SQL SECURITY DEFINER */
/*!50001 VIEW `extension_most_recent_small` AS select `e3`.`extid` AS `extid`,`e3`.`date` AS `date`,`e3`.`name` AS `name`,`e3`.`version` AS `version`,`e3`.`description` AS `description`,`e3`.`downloads` AS `downloads`,`e3`.`rating` AS `rating`,`e3`.`ratingcount` AS `ratingcount`,`e3`.`fulldescription` AS `fulldescription`,`e3`.`developer` AS `developer`,`e3`.`itemcategory` AS `itemcategory`,`e3`.`crx_etag` AS `crx_etag`,`e3`.`lastupdated` AS `lastupdated` from (((select `e1`.`extid` AS `extid`,max(`e1`.`date`) AS `date` from `extensions`.`extension` `e1` where `e1`.`extid` like 'aa%' group by `e1`.`extid`)) `e2` join `extensions`.`extension` `e3` on(`e2`.`extid` = `e3`.`extid` and `e2`.`date` = `e3`.`date`)) */;
/*!50001 VIEW `extension_most_recent_small` AS select `e3`.`extid` AS `extid`,`e3`.`date` AS `date`,`e3`.`name` AS `name`,`e3`.`version` AS `version`,`e3`.`description` AS `description`,`e3`.`downloads` AS `downloads`,`e3`.`rating` AS `rating`,`e3`.`ratingcount` AS `ratingcount`,`e3`.`fulldescription` AS `fulldescription`,`e3`.`offeredby` AS `offeredby`,`e3`.`developer` AS `developer`,`e3`.`itemcategory` AS `itemcategory`,`e3`.`crx_etag` AS `crx_etag`,`e3`.`lastupdated` AS `lastupdated` from (((select `e1`.`extid` AS `extid`,max(`e1`.`date`) AS `date` from `extensions`.`extension` `e1` where `e1`.`extid` like 'aa%' group by `e1`.`extid`)) `e2` join `extensions`.`extension` `e3` on(`e2`.`extid` = `e3`.`extid` and `e2`.`date` = `e3`.`date`)) */;
/*!50001 SET character_set_client = @saved_cs_client */;
/*!50001 SET character_set_results = @saved_cs_results */;
/*!50001 SET collation_connection = @saved_col_connection */;

View File

@ -30,7 +30,7 @@ create function until_date returns datetime NO SQL DEERMINISTIC return @until_da
/*!50001 SET collation_connection = utf8_general_ci */;
/*!50001 CREATE ALGORITHM=UNDEFINED */
/*!50013 DEFINER=`root`@`localhost` SQL SECURITY DEFINER */
/*!50001 VIEW `extension_most_recent_until_date` AS select `e1`.`extid` AS `extid`,`e1`.`date` AS `date`,`extensions`.`extension`.`name` AS `name`,`extensions`.`extension`.`version` AS `version`,`extensions`.`extension`.`description` AS `description`,`extensions`.`extension`.`downloads` AS `downloads`,`extensions`.`extension`.`rating` AS `rating`,`extensions`.`extension`.`ratingcount` AS `ratingcount`,`extensions`.`extension`.`fulldescription` AS `fulldescription`,`extensions`.`extension`.`developer` AS `developer`,`extensions`.`extension`.`itemcategory` AS `itemcategory`,`extensions`.`extension`.`crx_etag` AS `crx_etag`,`extensions`.`extension`.`lastupdated` AS `lastupdated`,`extensions`.`extension`.`last_modified` AS `last_modified` from (((select `extensions`.`extension`.`extid` AS `extid`,max(`extensions`.`extension`.`date`) AS `date` from `extensions`.`extension` where `extensions`.`extension`.`date` <= `until_date`() group by `extensions`.`extension`.`extid`)) `e1` join `extensions`.`extension` on(`e1`.`extid` = `extensions`.`extension`.`extid` and `e1`.`date` = `extensions`.`extension`.`date`)) */;
/*!50001 VIEW `extension_most_recent_until_date` AS select `e1`.`extid` AS `extid`,`e1`.`date` AS `date`,`extensions`.`extension`.`name` AS `name`,`extensions`.`extension`.`version` AS `version`,`extensions`.`extension`.`description` AS `description`,`extensions`.`extension`.`downloads` AS `downloads`,`extensions`.`extension`.`rating` AS `rating`,`extensions`.`extension`.`ratingcount` AS `ratingcount`,`extensions`.`extension`.`fulldescription` AS `fulldescription`,`extensions`.`extension`.`offeredby` AS `offeredby`, `extensions`.`extension`.`developer` AS `developer`,`extensions`.`extension`.`itemcategory` AS `itemcategory`,`extensions`.`extension`.`crx_etag` AS `crx_etag`,`extensions`.`extension`.`lastupdated` AS `lastupdated`,`extensions`.`extension`.`last_modified` AS `last_modified` from (((select `extensions`.`extension`.`extid` AS `extid`,max(`extensions`.`extension`.`date`) AS `date` from `extensions`.`extension` where `extensions`.`extension`.`date` <= `until_date`() group by `extensions`.`extension`.`extid`)) `e1` join `extensions`.`extension` on(`e1`.`extid` = `extensions`.`extension`.`extid` and `e1`.`date` = `extensions`.`extension`.`date`)) */;
/*!50001 SET character_set_client = @saved_cs_client */;
/*!50001 SET character_set_results = @saved_cs_results */;
/*!50001 SET collation_connection = @saved_col_connection */;

View File

@ -27,7 +27,7 @@
/*!50001 SET collation_connection = utf8_general_ci */;
/*!50001 CREATE ALGORITHM=UNDEFINED */
/*!50013 DEFINER=`root`@`localhost` SQL SECURITY DEFINER */
/*!50001 VIEW `extension_second_most_recent` AS select `e1`.`extid` AS `extid`,`e1`.`date` AS `date`,`extensions`.`extension`.`name` AS `name`,`extensions`.`extension`.`version` AS `version`,`extensions`.`extension`.`description` AS `description`,`extensions`.`extension`.`downloads` AS `downloads`,`extensions`.`extension`.`rating` AS `rating`,`extensions`.`extension`.`ratingcount` AS `ratingcount`,`extensions`.`extension`.`fulldescription` AS `fulldescription`,`extensions`.`extension`.`developer` AS `developer`,`extensions`.`extension`.`itemcategory` AS `itemcategory`,`extensions`.`extension`.`crx_etag` AS `crx_etag`,`extensions`.`extension`.`lastupdated` AS `lastupdated`,`extensions`.`extension`.`last_modified` AS `last_modified` from (((select `extensions`.`extension`.`extid` AS `extid`,max(`extensions`.`extension`.`date`) AS `date` from `extensions`.`extension` where !((`extensions`.`extension`.`extid`,`extensions`.`extension`.`date`) in (select `extensions`.`extension`.`extid`,max(`extensions`.`extension`.`date`) AS `date` from `extensions`.`extension` group by `extensions`.`extension`.`extid`)) group by `extensions`.`extension`.`extid`)) `e1` join `extensions`.`extension` on(`e1`.`extid` = `extensions`.`extension`.`extid` and `e1`.`date` = `extensions`.`extension`.`date`)) */;
/*!50001 VIEW `extension_second_most_recent` AS select `e1`.`extid` AS `extid`,`e1`.`date` AS `date`,`extensions`.`extension`.`name` AS `name`,`extensions`.`extension`.`version` AS `version`,`extensions`.`extension`.`description` AS `description`,`extensions`.`extension`.`downloads` AS `downloads`,`extensions`.`extension`.`rating` AS `rating`,`extensions`.`extension`.`ratingcount` AS `ratingcount`,`extensions`.`extension`.`fulldescription` AS `fulldescription`,`extensions`.`extension`.`offeredby` AS `offeredby`,`extensions`.`extension`.`developer` AS `developer`,`extensions`.`extension`.`itemcategory` AS `itemcategory`,`extensions`.`extension`.`crx_etag` AS `crx_etag`,`extensions`.`extension`.`lastupdated` AS `lastupdated`,`extensions`.`extension`.`last_modified` AS `last_modified` from (((select `extensions`.`extension`.`extid` AS `extid`,max(`extensions`.`extension`.`date`) AS `date` from `extensions`.`extension` where !((`extensions`.`extension`.`extid`,`extensions`.`extension`.`date`) in (select `extensions`.`extension`.`extid`,max(`extensions`.`extension`.`date`) AS `date` from `extensions`.`extension` group by `extensions`.`extension`.`extid`)) group by `extensions`.`extension`.`extid`)) `e1` join `extensions`.`extension` on(`e1`.`extid` = `extensions`.`extension`.`extid` and `e1`.`date` = `extensions`.`extension`.`date`)) */;
/*!50001 SET character_set_client = @saved_cs_client */;
/*!50001 SET character_set_results = @saved_cs_results */;
/*!50001 SET collation_connection = @saved_col_connection */;

View File

@ -30,7 +30,7 @@ create function until_date returns datetime NO SQL DEERMINISTIC return @until_da
/*!50001 SET collation_connection = utf8_general_ci */;
/*!50001 CREATE ALGORITHM=UNDEFINED */
/*!50013 DEFINER=`root`@`localhost` SQL SECURITY DEFINER */
/*!50001 VIEW `extension_second_most_recent_until_date` AS select `e1`.`extid` AS `extid`,`e1`.`date` AS `date`,`extensions`.`extension`.`name` AS `name`,`extensions`.`extension`.`version` AS `version`,`extensions`.`extension`.`description` AS `description`,`extensions`.`extension`.`downloads` AS `downloads`,`extensions`.`extension`.`rating` AS `rating`,`extensions`.`extension`.`ratingcount` AS `ratingcount`,`extensions`.`extension`.`fulldescription` AS `fulldescription`,`extensions`.`extension`.`developer` AS `developer`,`extensions`.`extension`.`itemcategory` AS `itemcategory`,`extensions`.`extension`.`crx_etag` AS `crx_etag`,`extensions`.`extension`.`lastupdated` AS `lastupdated`,`extensions`.`extension`.`last_modified` AS `last_modified` from (((select `extensions`.`extension`.`extid` AS `extid`,max(`extensions`.`extension`.`date`) AS `date` from `extensions`.`extension` where `extensions`.`extension`.`date` <= `until_date`() and !((`extensions`.`extension`.`extid`,`extensions`.`extension`.`date`) in (select `extensions`.`extension`.`extid`,max(`extensions`.`extension`.`date`) AS `date` from `extensions`.`extension` where `extensions`.`extension`.`date` <= `until_date`() group by `extensions`.`extension`.`extid`)) group by `extensions`.`extension`.`extid`)) `e1` join `extensions`.`extension` on(`e1`.`extid` = `extensions`.`extension`.`extid` and `e1`.`date` = `extensions`.`extension`.`date`)) */;
/*!50001 VIEW `extension_second_most_recent_until_date` AS select `e1`.`extid` AS `extid`,`e1`.`date` AS `date`,`extensions`.`extension`.`name` AS `name`,`extensions`.`extension`.`version` AS `version`,`extensions`.`extension`.`description` AS `description`,`extensions`.`extension`.`downloads` AS `downloads`,`extensions`.`extension`.`rating` AS `rating`,`extensions`.`extension`.`ratingcount` AS `ratingcount`,`extensions`.`extension`.`fulldescription` AS `fulldescription`,`extensions`.`extension`.`offeredby` AS `offeredby`,`extensions`.`extension`.`developer` AS `developer`, `extensions`.`extension`.`itemcategory` AS `itemcategory`,`extensions`.`extension`.`crx_etag` AS `crx_etag`,`extensions`.`extension`.`lastupdated` AS `lastupdated`,`extensions`.`extension`.`last_modified` AS `last_modified` from (((select `extensions`.`extension`.`extid` AS `extid`,max(`extensions`.`extension`.`date`) AS `date` from `extensions`.`extension` where `extensions`.`extension`.`date` <= `until_date`() and !((`extensions`.`extension`.`extid`,`extensions`.`extension`.`date`) in (select `extensions`.`extension`.`extid`,max(`extensions`.`extension`.`date`) AS `date` from `extensions`.`extension` where `extensions`.`extension`.`date` <= `until_date`() group by `extensions`.`extension`.`extid`)) group by `extensions`.`extension`.`extid`)) `e1` join `extensions`.`extension` on(`e1`.`extid` = `extensions`.`extension`.`extid` and `e1`.`date` = `extensions`.`extension`.`date`)) */;
/*!50001 SET character_set_client = @saved_cs_client */;
/*!50001 SET character_set_results = @saved_cs_results */;
/*!50001 SET collation_connection = @saved_col_connection */;

View File

@ -27,7 +27,7 @@
/*!50001 SET collation_connection = utf8_general_ci */;
/*!50001 CREATE ALGORITHM=UNDEFINED */
/*!50013 DEFINER=`writer`@`%` SQL SECURITY DEFINER */
/*!50001 VIEW `extension_small` AS select `extension`.`extid` AS `extid`,`extension`.`date` AS `date`,`extension`.`name` AS `name`,`extension`.`version` AS `version`,`extension`.`description` AS `description`,`extension`.`downloads` AS `downloads`,`extension`.`rating` AS `rating`,`extension`.`ratingcount` AS `ratingcount`,`extension`.`fulldescription` AS `fulldescription`,`extension`.`developer` AS `developer`,`extension`.`itemcategory` AS `itemcategory`,`extension`.`crx_etag` AS `crx_etag`,`extension`.`lastupdated` AS `lastupdated` from `extension` where `extension`.`extid` like 'aa%' */;
/*!50001 VIEW `extension_small` AS select `extension`.`extid` AS `extid`,`extension`.`date` AS `date`,`extension`.`name` AS `name`,`extension`.`version` AS `version`,`extension`.`description` AS `description`,`extension`.`downloads` AS `downloads`,`extension`.`rating` AS `rating`,`extension`.`ratingcount` AS `ratingcount`,`extension`.`fulldescription` AS `fulldescription`,`extensions`.`extension`.`offeredby` AS `offeredby`, `extension`.`developer` AS `developer`,`extension`.`itemcategory` AS `itemcategory`,`extension`.`crx_etag` AS `crx_etag`,`extension`.`lastupdated` AS `lastupdated` from `extension` where `extension`.`extid` like 'aa%' */;
/*!50001 SET character_set_client = @saved_cs_client */;
/*!50001 SET character_set_results = @saved_cs_results */;
/*!50001 SET collation_connection = @saved_col_connection */;