From c33a869272d8bd750e0dd4e737bc2e2951a9e813 Mon Sep 17 00:00:00 2001 From: Siddhant Sharma Date: Thu, 16 Oct 2025 22:38:57 +0530 Subject: [PATCH] bugfix: fix JSONDecodeError on instance creation by updating urls and headers. --- .gitignore | 1 + espncricinfo/match.py | 13 +++++++++--- espncricinfo/player.py | 44 ++++++++++++++++++++++++++--------------- espncricinfo/series.py | 9 ++++++++- espncricinfo/summary.py | 11 +++++++++-- tests_new.py | 21 ++++++++++++++++++++ 6 files changed, 77 insertions(+), 22 deletions(-) create mode 100644 tests_new.py diff --git a/.gitignore b/.gitignore index cc7188c..a3faf97 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ *.swp test_settings.py cricinfo/** +*.egg-info/ diff --git a/espncricinfo/match.py b/espncricinfo/match.py index e96bc8a..18e4bb0 100644 --- a/espncricinfo/match.py +++ b/espncricinfo/match.py @@ -7,9 +7,16 @@ class Match(object): def __init__(self, match_id): self.match_id = match_id - self.match_url = "https://www.espncricinfo.com/matches/engine/match/{0}.html".format(str(match_id)) - self.json_url = "https://www.espncricinfo.com/matches/engine/match/{0}.json".format(str(match_id)) - self.headers = {'user-agent': 'Mozilla/5.0'} + self.match_url = f"https://www.espncricinfo.com/ci/engine/match/{match_id}.html" + self.json_url = f"https://www.espncricinfo.com/ci/engine/match/{match_id}.json" + self.headers = { + "User-Agent": ( + "Mozilla/5.0 (X11; Linux x86_64; rv:143.0)" + "Gecko/20100101 Firefox/143.0" + ), + "Accept-Language": "en-US,en;q=0.9", + "Connection": "keep-alive" + } self.json = self.get_json() self.html = self.get_html() self.comms_json = self.get_comms_json() diff --git a/espncricinfo/player.py b/espncricinfo/player.py index 6b33b7e..ce46ceb 100644 --- a/espncricinfo/player.py +++ b/espncricinfo/player.py @@ -11,11 +11,19 @@ def __init__(self, player_id): self.player_id=player_id self.url = "https://www.espncricinfo.com/player/player-name-{0}".format(str(player_id)) self.json_url = "http://core.espnuk.org/v2/sports/cricket/athletes/{0}".format(str(player_id)) - self.new_json_url = "https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId={0}".format(str(player_id)) - self.headers = {'user-agent': 'Mozilla/5.0'} - self.parsed_html = self.get_html() - self.json = self.get_json() - self.new_json = self.get_new_json() + # new_json_url isn't accessible + # self.new_json_url = "https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId={0}".format(str(player_id)) + self.headers = { + "User-Agent": ( + "Mozilla/5.0 (X11; Linux x86_64; rv:143.0)" + "Gecko/20100101 Firefox/143.0" + ), + "Accept-Language": "en-US,en;q=0.9", + "Connection": "keep-alive" + } + self.parsed_html = self.get_html() + self.json = self.get_json() + # self.new_json = self.get_new_json() self.cricinfo_id = str(player_id) self.__unicode__ = self._full_name() self.name = self._name() @@ -26,7 +34,8 @@ def __init__(self, player_id): self.playing_role = self._playing_role() self.batting_style = self._batting_style() self.bowling_style = self._bowling_style() - self.major_teams = self._major_teams() + # new_json_url isn't accessible + # self.major_teams = self._major_teams() def get_html(self): r = requests.get(self.url, headers=self.headers) @@ -41,13 +50,15 @@ def get_json(self): raise PlayerNotFoundError else: return r.json() - - def get_new_json(self): - r = requests.get(self.new_json_url, headers=self.headers) - if r.status_code == 404: - raise PlayerNotFoundError - else: - return r.json() + + + # new_json_url isn't accessible + # def get_new_json(self): + # r = requests.get(self.new_json_url, headers=self.headers) + # if r.status_code == 404: + # raise PlayerNotFoundError + # else: + # return r.json() def _name(self): return self.json['name'] @@ -70,8 +81,9 @@ def _date_of_birth(self): def _current_age(self): return self.json['age'] - def _major_teams(self): - return [x['team']['longName'] for x in self.new_json['content']['teams']] +# new_json_url isn't accessible +# def _major_teams(self): +# return [x['team']['longName'] for x in self.new_json['content']['teams']] def _playing_role(self): return self.json['position'] @@ -203,4 +215,4 @@ def get_data(self, file_name=None, match_format=11, data_type='allround', view=' with open(self.file_name, "w") as csv_file: writer = csv.writer(csv_file, delimiter=',') for row in scores: - writer.writerow(row.splitlines()) \ No newline at end of file + writer.writerow(row.splitlines()) diff --git a/espncricinfo/series.py b/espncricinfo/series.py index 7999d5e..37dfb8d 100644 --- a/espncricinfo/series.py +++ b/espncricinfo/series.py @@ -9,7 +9,14 @@ def __init__(self, series_id): self.json_url = "http://core.espnuk.org/v2/sports/cricket/leagues/{0}/".format(str(series_id)) self.events_url = "http://core.espnuk.org/v2/sports/cricket/leagues/{0}/events".format(str(series_id)) self.seasons_url = "http://core.espnuk.org/v2/sports/cricket/leagues/{0}/seasons".format(str(series_id)) - self.headers = {'user-agent': 'Mozilla/5.0'} + self.headers = { + "User-Agent": ( + "Mozilla/5.0 (X11; Linux x86_64; rv:143.0)" + "Gecko/20100101 Firefox/143.0" + ), + "Accept-Language": "en-US,en;q=0.9", + "Connection": "keep-alive" + } self.json = self.get_json(self.json_url) self.seasons = self._get_seasons() self.years = self._get_years_from_seasons() diff --git a/espncricinfo/summary.py b/espncricinfo/summary.py index eca242f..8eaae31 100644 --- a/espncricinfo/summary.py +++ b/espncricinfo/summary.py @@ -6,7 +6,14 @@ class Summary(object): def __init__(self): self.url = "http://static.cricinfo.com/rss/livescores.xml" - self.headers = {'user-agent': 'Mozilla/5.0'} + self.headers = { + "User-Agent": ( + "Mozilla/5.0 (X11; Linux x86_64; rv:143.0)" + "Gecko/20100101 Firefox/143.0" + ), + "Accept-Language": "en-US,en;q=0.9", + "Connection": "keep-alive" + } self.xml = self.get_xml() self.match_ids = self._match_ids() self.matches = self._build_matches() @@ -19,7 +26,7 @@ def get_xml(self): return BeautifulSoup(r.text, 'xml') def _match_ids(self): - matches = [x.link.text.split(".html")[0].split('/')[6] for x in self.xml.findAll('item')] + matches = [x.link.text.split(".html")[0].split('/')[6] for x in self.xml.find_all('item')] return matches def _build_matches(self): diff --git a/tests_new.py b/tests_new.py new file mode 100644 index 0000000..3f8a765 --- /dev/null +++ b/tests_new.py @@ -0,0 +1,21 @@ +from espncricinfo.match import Match +from espncricinfo.player import Player +from espncricinfo.series import Series +from espncricinfo.summary import Summary + +class TestObjects: + def test_match(self): + match = Match('1490425') + assert match.home_team == "IND-W" + + def test_player(self): + player = Player('883405') + assert player.name == "Jemimah Rodrigues" + + def test_series(self): + series = Series('1478193') + assert series.short_name == "ICC Women's World Cup" + + def test_summary(self): + summary = Summary() + assert "1490429" in summary.match_ids