From b9ad8fe0ac18ff1351c4f26e0096ecb719f537ef Mon Sep 17 00:00:00 2001 From: prettysunflower Date: Wed, 21 May 2025 22:08:43 +0200 Subject: [PATCH] Extracting release dates from figures --- .idea/workspace.xml | 31 ++++++++++++++++++++--- src/hpoi/models/figure.py | 14 +++++++++++ src/hpoi/models/release.py | 48 ++++++++++++++++++++++++++++++++---- tests/test_data/figures.json | 33 +++++++++++++++++++++++-- tests/test_release.py | 46 ++++++++++++++++++++++++++++++++++ 5 files changed, 162 insertions(+), 10 deletions(-) create mode 100644 tests/test_release.py diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 4ea9b21..2ea20c9 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -5,7 +5,11 @@ - + + + + + @@ -159,8 +183,9 @@ - + + \ No newline at end of file diff --git a/src/hpoi/models/figure.py b/src/hpoi/models/figure.py index 6f4402c..4aefba5 100644 --- a/src/hpoi/models/figure.py +++ b/src/hpoi/models/figure.py @@ -69,6 +69,20 @@ class Figure: if info_list_item_size: self.size = info_list_item_size.p.string.strip() + info_list_item_releases = info_list_items.get("发售") + if info_list_item_releases: + for x in info_list_item_releases.find_all("p"): + release = Release.from_string(x.get_text().strip()) + if release: + self.release.append(release) + + info_list_item_releases = info_list_items.get("出货日") + if info_list_item_releases: + for x in info_list_item_releases.find_all("p"): + release = Release.from_string(x.get_text().strip()) + if release: + self.release.append(release) + self.images = [ re.sub( r"^https://r\.hpoi\.net/gk/pic/s/(\d{4})/(\d{2})/([\w\d]{32})\.(\w+)(?:\?.*)?", diff --git a/src/hpoi/models/release.py b/src/hpoi/models/release.py index 81bc796..6b8323f 100644 --- a/src/hpoi/models/release.py +++ b/src/hpoi/models/release.py @@ -1,3 +1,4 @@ +import re from dataclasses import dataclass from typing import Optional, Dict @@ -6,8 +7,45 @@ from hpoi.models.currency import Currency @dataclass class Release: - year: Optional[int] - month: Optional[int] - day: Optional[int] - price: Optional[Dict[Currency, int]] - full_text: str \ No newline at end of file + full_text: str + year: Optional[int] = None + month: Optional[int] = None + day: Optional[int] = None + price: Optional[tuple[Currency, int]] = None + + @classmethod + def from_string(cls, text) -> Optional["Release"]: + if text == "未知": + return None + + self = cls(full_text=text) + + text = text.replace(",", "") + + full_date = re.search(r"(\d{4})/(\d{1,2})/(\d{1,2})", text) + if full_date: + self.year = int(full_date.group(1)) + self.month = int(full_date.group(2)) + self.day = int(full_date.group(3)) + else: + year_match = re.search(r"(\d{4})年", text) + if year_match: + self.year = int(year_match.group(1)) + + month_match = re.search(r"(\d{1,2})月", text) + if month_match: + self.month = int(month_match.group(1)) + + day_match = re.search(r"(\d{1,2})日", text) + if day_match: + self.day = int(day_match.group(1)) + + cny_regex = re.search(r"(\d+)人民币", text) + if cny_regex: + self.price = (Currency.CNY, int(cny_regex.group(1))) + + jpy_regex = re.search(r"(\d+)日元", text) + if jpy_regex: + self.price = (Currency.JPY, int(jpy_regex.group(1))) + + return self diff --git a/tests/test_data/figures.json b/tests/test_data/figures.json index 8abe4b9..e492a64 100644 --- a/tests/test_data/figures.json +++ b/tests/test_data/figures.json @@ -26,7 +26,28 @@ "JPY": 9400, "CNY": 465 }, - "release": [], + "release": [ + { + "full_text": "2023/2/15 , 9,400日元(税)", + "year": 2023, + "month": 2, + "day": 15, + "price": [ + "JPY", + 9400 + ] + }, + { + "full_text": "2022/8/6 , 9,400日元(税) , Smile Fest 2022 先行贩售", + "year": 2022, + "month": 8, + "day": 6, + "price": [ + "JPY", + 9400 + ] + } + ], "scale": null, "size": "H=100mm", "mfc_id": null, @@ -66,7 +87,15 @@ "value": { "CNY": 1999 }, - "release": [], + "release": [ + { + "full_text": "2022年10月", + "year": 2022, + "month": 10, + "day": null, + "price": null + } + ], "scale": "1/3", "size": "H=270mm L=335mm D=355mm", "mfc_id": null, diff --git a/tests/test_release.py b/tests/test_release.py new file mode 100644 index 0000000..16d96e9 --- /dev/null +++ b/tests/test_release.py @@ -0,0 +1,46 @@ +from hpoi.models.currency import Currency +from hpoi.models.release import Release + + +def test_unknown(): + assert Release.from_string("未知") is None + + +def test_chinese_year(): + release = Release.from_string("2025年") + assert release.year == 2025 + assert release.month is None + assert release.day is None + assert release.price is None + + +def test_chinese_year_month(): + release = Release.from_string("2022年10月") + assert release.year == 2022 + assert release.month == 10 + assert release.day is None + assert release.price is None + + +def test_chinese_full_date(): + release = Release.from_string("2025年5月28日") + assert release.year == 2025 + assert release.month == 5 + assert release.day == 28 + assert release.price is None + + +def test_full_date_japanese_yen(): + release = Release.from_string("2023/2/15 , 9,400日元(税)") + assert release.year == 2023 + assert release.month == 2 + assert release.day == 15 + assert release.price == (Currency.JPY, 9400) + + +def test_full_date_chinese_yuan(): + release = Release.from_string("2025/5/7 , 369人民币 , 预售优惠价") + assert release.year == 2025 + assert release.month == 5 + assert release.day == 7 + assert release.price == (Currency.CNY, 369)