Skip to content
This repository was archived by the owner on May 5, 2023. It is now read-only.

Commit 7c2f8a8

Browse files
committed
feat: update state apis which has data from covidtracking
1 parent 808800c commit 7c2f8a8

File tree

10 files changed

+268
-85
lines changed

10 files changed

+268
-85
lines changed

django_covid19/admin.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,15 @@ class CountryAdmin(BaseAdmin):
8787
)
8888
search_fields = (
8989
'continents', 'countryFullName', 'countryShortCode', 'countryName'
90-
)
90+
)
91+
92+
93+
@admin.register(models.State)
94+
class StateAdmin(BaseAdmin):
95+
96+
list_display = (
97+
'countryShortCode', 'stateName', 'state',
98+
'totalTestResults', 'positive', 'negative', 'pending',
99+
'hospitalized', 'death', 'recovered', 'dateModified'
100+
)
101+
search_fields = ('countryShortCode', 'stateName', 'state')
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
# Generated by Django 2.2.12 on 2020-05-29 13:50
2+
3+
from django.db import migrations, models
4+
import django.db.models.deletion
5+
import django.utils.timezone
6+
7+
8+
class Migration(migrations.Migration):
9+
10+
initial = True
11+
12+
dependencies = [
13+
]
14+
15+
operations = [
16+
migrations.CreateModel(
17+
name='Country',
18+
fields=[
19+
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
20+
('locationId', models.IntegerField()),
21+
('continents', models.CharField(max_length=50)),
22+
('countryShortCode', models.CharField(max_length=20)),
23+
('countryName', models.CharField(max_length=50)),
24+
('countryFullName', models.CharField(max_length=50)),
25+
('currentConfirmedCount', models.IntegerField(default=0)),
26+
('confirmedCount', models.IntegerField(default=0)),
27+
('suspectedCount', models.IntegerField(default=0)),
28+
('curedCount', models.IntegerField(default=0)),
29+
('deadCount', models.IntegerField(default=0)),
30+
('showRank', models.BooleanField(null=True)),
31+
('deadRateRank', models.IntegerField(null=True)),
32+
('deadCountRank', models.IntegerField(null=True)),
33+
('confirmedCountRank', models.FloatField(null=True)),
34+
('deadRate', models.FloatField(null=True)),
35+
('tags', models.CharField(max_length=200, null=True)),
36+
('statisticsData', models.CharField(max_length=500, null=True)),
37+
('comment', models.CharField(max_length=200, null=True)),
38+
('incrVo', models.TextField(null=True)),
39+
('sort', models.IntegerField(null=True)),
40+
('operator', models.CharField(max_length=50, null=True)),
41+
('dailyData', models.TextField()),
42+
('createTime', models.DateTimeField(auto_now_add=True, verbose_name='createTime')),
43+
('modifyTime', models.DateTimeField(auto_now=True, verbose_name='modifyTime')),
44+
],
45+
options={
46+
'verbose_name': 'Country',
47+
'verbose_name_plural': 'Country',
48+
},
49+
),
50+
migrations.CreateModel(
51+
name='Province',
52+
fields=[
53+
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
54+
('locationId', models.IntegerField(verbose_name='locationId')),
55+
('provinceName', models.CharField(max_length=50, verbose_name='provinceName')),
56+
('provinceShortName', models.CharField(max_length=20, verbose_name='provinceShortName')),
57+
('currentConfirmedCount', models.IntegerField(default=0, verbose_name='currentConfirmedCount')),
58+
('confirmedCount', models.IntegerField(default=0, verbose_name='confirmedCount')),
59+
('suspectedCount', models.IntegerField(default=0, verbose_name='suspectedCount')),
60+
('curedCount', models.IntegerField(default=0, verbose_name='curedCount')),
61+
('deadCount', models.IntegerField(default=0, verbose_name='deadCount')),
62+
('comment', models.CharField(max_length=200, verbose_name='comment')),
63+
('statisticsData', models.CharField(max_length=500, verbose_name='statisticsData')),
64+
('dailyData', models.TextField(verbose_name='dailyData')),
65+
('createTime', models.DateTimeField(auto_now_add=True, verbose_name='createTime')),
66+
('modifyTime', models.DateTimeField(auto_now=True, verbose_name='modifyTime')),
67+
],
68+
options={
69+
'verbose_name': 'Province',
70+
'verbose_name_plural': 'Province',
71+
},
72+
),
73+
migrations.CreateModel(
74+
name='State',
75+
fields=[
76+
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
77+
('stateId', models.CharField(max_length=10)),
78+
('countryShortCode', models.CharField(max_length=20)),
79+
('name', models.CharField(max_length=50)),
80+
],
81+
options={
82+
'verbose_name': 'State',
83+
'verbose_name_plural': 'State',
84+
},
85+
),
86+
migrations.CreateModel(
87+
name='Statistics',
88+
fields=[
89+
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
90+
('globalStatistics', models.TextField(default='{}', verbose_name='globalStatistics')),
91+
('domesticStatistics', models.TextField(default='{}', verbose_name='domesticStatistics')),
92+
('internationalStatistics', models.TextField(default='{}', verbose_name='internationalStatistics')),
93+
('remarks', models.TextField(default='[]', verbose_name='remarks')),
94+
('notes', models.TextField(default='[]', verbose_name='notes')),
95+
('generalRemark', models.TextField(default='', verbose_name='generalRemark')),
96+
('WHOArticle', models.TextField(default='{}', verbose_name='WHOArticle')),
97+
('recommends', models.TextField(default='[]', verbose_name='recommends')),
98+
('timelines', models.TextField(default='[]', verbose_name='timelines')),
99+
('wikis', models.TextField(default='[]', verbose_name='Wiki')),
100+
('goodsGuides', models.TextField(default='[]', verbose_name='goodsGuides')),
101+
('rumors', models.TextField(default='[]', verbose_name='rumors')),
102+
('modifyTime', models.DateTimeField(null=True, verbose_name='modifyTime')),
103+
('createTime', models.DateTimeField(null=True, verbose_name='createTime')),
104+
('crawlTime', models.DateTimeField(default=django.utils.timezone.now, editable=False, verbose_name='crawlTime')),
105+
],
106+
options={
107+
'verbose_name': 'Statistics',
108+
'verbose_name_plural': 'Statistics',
109+
},
110+
),
111+
migrations.CreateModel(
112+
name='City',
113+
fields=[
114+
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
115+
('locationId', models.IntegerField(verbose_name='locationId')),
116+
('cityName', models.CharField(max_length=50, verbose_name='cityName')),
117+
('currentConfirmedCount', models.IntegerField(default=0, verbose_name='currentConfirmedCount')),
118+
('confirmedCount', models.IntegerField(default=0, verbose_name='confirmedCount')),
119+
('suspectedCount', models.IntegerField(default=0, verbose_name='suspectedCount')),
120+
('curedCount', models.IntegerField(default=0, verbose_name='curedCount')),
121+
('deadCount', models.IntegerField(default=0, verbose_name='deadCount')),
122+
('createTime', models.DateTimeField(auto_now_add=True, verbose_name='createTime')),
123+
('modifyTime', models.DateTimeField(auto_now=True, verbose_name='modifyTime')),
124+
('province', models.ForeignKey(db_column='provinceId', on_delete=django.db.models.deletion.CASCADE, related_name='cities', to='django_covid19.Province', verbose_name='province')),
125+
],
126+
options={
127+
'verbose_name': 'City',
128+
'verbose_name_plural': 'City',
129+
},
130+
),
131+
]

django_covid19/models.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,13 @@ class Meta:
112112
verbose_name = _('Country')
113113
verbose_name_plural = _('Country')
114114

115+
115116
class State(models.Model):
116117

117118
countryShortCode = models.CharField(max_length=20)
118119
stateName = models.CharField(max_length=50, null=False)
120+
currentUrl = models.URLField(max_length=200, null=True, blank=True)
121+
dailyUrl = models.URLField(max_length=200, null=True, blank=True)
119122
dailyData = models.TextField(default='[]') # save daily data here
120123

121124
# fields in covidtracking api

django_covid19/serializers.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ class Meta:
135135
'suspectedCount', 'curedCount', 'deadCount', 'incrVo'
136136
]
137137

138+
138139
class StateSerializer(serializers.ModelSerializer):
139140

140141
countryShortCode = serializers.CharField()
@@ -155,10 +156,17 @@ class Meta:
155156
fields = [
156157
'currentConfirmedCount', 'confirmedCount', 'curedCount',
157158
'deadCount', 'suspectedCount', 'stateName', 'state',
158-
'countryShortCode'
159+
'countryShortCode', 'dailyUrl', 'currentUrl'
159160
]
160161

161162

163+
class StateRawSerializer(serializers.ModelSerializer):
164+
165+
class Meta:
166+
model = models.State
167+
exclude = ('id', 'dailyData')
168+
169+
162170
class StateDailySerializer(serializers.Serializer):
163171

164172
state = serializers.CharField()

django_covid19/spider/nCoV/pipelines.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,19 @@
1414

1515
from . import items
1616

17-
class CovidTrackingPipeline(object):
17+
class BasePipeline(object):
18+
19+
def open_spider(self, spider):
20+
spider.object_id = uuid4().hex
21+
cache.set('running_spider_id', spider.object_id)
22+
spider.crawled = 0
23+
24+
def close_spider(self, spider):
25+
cache.set('crawled', spider.crawled)
26+
cache.delete('running_spider_id')
27+
28+
29+
class CovidTrackingPipeline(BasePipeline):
1830

1931
def process_item(self, item, spider):
2032
if isinstance(item, items.StateItem):
@@ -26,12 +38,7 @@ def process_item(self, item, spider):
2638
return item
2739

2840

29-
class NcovPipeline(object):
30-
31-
def open_spider(self, spider):
32-
spider.object_id = uuid4().hex
33-
cache.set('running_spider_id', spider.object_id)
34-
spider.crawled = 0
41+
class NcovPipeline(BasePipeline):
3542

3643
def process_item(self, item, spider):
3744
if isinstance(item, items.CityItem):
@@ -58,8 +65,4 @@ def process_item(self, item, spider):
5865
klass.django_model.objects.create(**item)
5966
return item
6067
else:
61-
return item
62-
63-
def close_spider(self, spider):
64-
cache.set('crawled', spider.crawled)
65-
cache.delete('running_spider_id')
68+
return item

django_covid19/spider/nCoV/settings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.54 Safari/536.5'
2020

2121
# Obey robots.txt rules
22-
ROBOTSTXT_OBEY = True
22+
ROBOTSTXT_OBEY = False
2323

2424
# Configure maximum concurrent requests performed by Scrapy (default: 16)
2525
#CONCURRENT_REQUESTS = 32

django_covid19/spider/nCoV/spiders/covidtracking.py

Lines changed: 50 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# @Author: zhanglei3
33
# @Date: 2020-04-08 09:08:13
44
# @Last Modified by: leafcoder
5-
# @Last Modified time: 2020-05-30 19:02:49
5+
# @Last Modified time: 2020-06-01 12:43:31
66

77
"""美国各州疫情数据源"""
88

@@ -81,63 +81,77 @@
8181

8282
class CovidTrackingSpider(scrapy.Spider):
8383

84-
"""data source: https://covidtracking.com/api"""
84+
"""Data source: https://covidtracking.com/api
85+
86+
Covidtracking update all the data each day between 4pm and 5pm EDT.
87+
"""
8588

8689
name = "covidtracking"
8790
allowed_domains = ["covidtracking.com"]
91+
92+
# custom attributes
93+
daily_state_url_template = \
94+
'https://covidtracking.com/api/v1/states/%s/daily.json'
95+
current_state_url_template = \
96+
'https://covidtracking.com/api/v1/states/%s/current.json'
8897
country_short_code = 'USA'
8998
states = {}
9099

91100
def start_requests(self):
92-
apis = [
93-
'https://covidtracking.com/api/v1/states/current.json',
94-
'https://covidtracking.com/api/v1/states/daily.json',
95-
'https://covidtracking.com/api/v1/states/info.json',
96-
'https://covidtracking.com/api/v1/us/daily.json',
97-
]
101+
object_id = self.object_id
102+
spider_id = cache.get('running_spider_id')
103+
if object_id != spider_id:
104+
logger.info('Spider is running.')
105+
self.crawled = 0
106+
return
107+
98108
yield scrapy.Request(
99109
'https://covidtracking.com/api/v1/states/info.json',
100110
self.parse_info)
101111

102-
def parse_states_current(self, response):
103-
countryShortCode = self.country_short_code
112+
def parse_info(self, response):
113+
country_short_code = self.country_short_code
114+
states = self.states
115+
result = json.loads(response.text)
116+
for item in result:
117+
state = item['state']
118+
state_name = item['name']
119+
state_name = ''.join(state_name.split())
120+
states[state] = {
121+
'state': state,
122+
'countryShortCode': country_short_code,
123+
'stateName': state_name
124+
}
125+
yield scrapy.Request(
126+
'https://covidtracking.com/api/v1/states/current.json',
127+
self.parse_current_states)
128+
129+
def parse_current_states(self, response):
130+
country_short_code = self.country_short_code
104131
states = self.states
105132
result = json.loads(response.text)
106133
for item in result:
107134
state = item['state']
135+
daily_state_url = self.daily_state_url_template % state
136+
current_state_url = self.current_state_url_template % state
108137
state_item = states[state]
109138
state_item.update(item)
110139
state_item.pop('grade', None)
111140
state_item.pop('total', None)
112-
state_item['countryShortCode'] = countryShortCode
141+
state_item['countryShortCode'] = country_short_code
142+
state_item['currentUrl'] = current_state_url
143+
state_item['dailyUrl'] = daily_state_url
113144
yield scrapy.Request(
114-
'https://covidtracking.com/api/v1/states/%s/daily.json' \
115-
% state,
116-
self.parse_state_daily,
117-
meta={
118-
'state_item': state_item
119-
})
120-
121-
def parse_state_daily(self, response):
145+
daily_state_url,
146+
self.parse_daily_state,
147+
meta={'state_item': state_item})
148+
149+
self.crawled = 1 # 代表爬虫已爬取数据
150+
151+
def parse_daily_state(self, response):
122152
meta = response.meta
123153
state_item = meta['state_item']
124154
state_item['dailyData'] = json.dumps(
125155
json.loads(response.text)[::-1])
126-
yield items.StateItem(**state_item)
127156

128-
def parse_info(self, response):
129-
countryShortCode = self.country_short_code
130-
states = self.states
131-
result = json.loads(response.text)
132-
for item in result:
133-
state = item['state']
134-
stateName = item['name']
135-
stateName = ''.join(stateName.split())
136-
states[state] = {
137-
'state': state,
138-
'countryShortCode': countryShortCode,
139-
'stateName': stateName
140-
}
141-
yield scrapy.Request(
142-
'https://covidtracking.com/api/v1/states/current.json',
143-
self.parse_states_current)
157+
yield items.StateItem(**state_item)

0 commit comments

Comments
 (0)