image download functionality working
Browse files- ebird_taxonomy_v2023.csv +0 -0
- fetch_img.py +65 -1
ebird_taxonomy_v2023.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
fetch_img.py
CHANGED
|
@@ -2,4 +2,68 @@ import os
|
|
| 2 |
import requests
|
| 3 |
from bs4 import BeautifulSoup
|
| 4 |
from urllib.parse import urljoin
|
| 5 |
-
import urllib.request
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import requests
|
| 3 |
from bs4 import BeautifulSoup
|
| 4 |
from urllib.parse import urljoin
|
| 5 |
+
import urllib.request
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import warnings
|
| 8 |
+
from requests.packages.urllib3.exceptions import InsecureRequestWarning
|
| 9 |
+
|
| 10 |
+
warnings.simplefilter('ignore', InsecureRequestWarning)
|
| 11 |
+
|
| 12 |
+
REQ_FMT = {
|
| 13 |
+
"url": 'https://api.ebird.org/v2/ref/taxonomy/ebird',
|
| 14 |
+
"params" : {
|
| 15 |
+
'species': 'CHANGE THIS TO SPECIES CODE'
|
| 16 |
+
},
|
| 17 |
+
"headers" : {
|
| 18 |
+
'X-eBirdApiToken': 'id1a0e3q2lt3'
|
| 19 |
+
}
|
| 20 |
+
}
|
| 21 |
+
bird_df = pd.read_csv("ebird_taxonomy_v2023.csv")
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def scientific_to_species_code(scientific_name: str):
|
| 25 |
+
scode = bird_df.loc[bird_df['SCI_NAME'] == scientific_name]['SPECIES_CODE']
|
| 26 |
+
return scode.array[0]
|
| 27 |
+
|
| 28 |
+
# Gets taxonomical info on bird. (Is not actually used)
|
| 29 |
+
def get_bird_info(species_code : str):
|
| 30 |
+
|
| 31 |
+
REQ_FMT['params'] = {"species": species_code}
|
| 32 |
+
response = requests.get(REQ_FMT["url"], headers=REQ_FMT["headers"], params=REQ_FMT['params'], verify=False)
|
| 33 |
+
data = response.content
|
| 34 |
+
return data
|
| 35 |
+
|
| 36 |
+
def download_images(url, folder_path='assets'):
|
| 37 |
+
# Create a folder to save images if it doesn't exist
|
| 38 |
+
if not os.path.exists(folder_path):
|
| 39 |
+
os.makedirs(folder_path)
|
| 40 |
+
|
| 41 |
+
# Fetch the HTML content of the webpage
|
| 42 |
+
response = requests.get(url, verify=False)
|
| 43 |
+
if response.status_code != 200:
|
| 44 |
+
print(f"Failed to retrieve the page. Status code: {response.status_code}")
|
| 45 |
+
return
|
| 46 |
+
|
| 47 |
+
# Parse the HTML content
|
| 48 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
| 49 |
+
|
| 50 |
+
# Find all image tags
|
| 51 |
+
img_tags = soup.find_all('img')
|
| 52 |
+
|
| 53 |
+
# Extract image URLs
|
| 54 |
+
img_urls = [urljoin(url, img['src']) for img in img_tags if 'src' in img.attrs]
|
| 55 |
+
img_urls = [el for el in img_urls if "api" in el and "asset" in el]
|
| 56 |
+
# Download each image
|
| 57 |
+
for i, img_url in enumerate(img_urls):
|
| 58 |
+
try:
|
| 59 |
+
img_path = os.path.join(folder_path, f'image_{i+1}.jpg')
|
| 60 |
+
urllib.request.urlretrieve(img_url, img_path)
|
| 61 |
+
print(f"Downloaded: {img_url}")
|
| 62 |
+
except Exception as e:
|
| 63 |
+
print(f"Failed to download {img_url}. Error: {e}")
|
| 64 |
+
|
| 65 |
+
if __name__ == '__main__':
|
| 66 |
+
bird_tax = pd.read_csv("ebird_taxonomy_v2023.csv")
|
| 67 |
+
scode = scientific_to_species_code("Melanocharis striativentris")
|
| 68 |
+
print(get_bird_info(scode))
|
| 69 |
+
download_images(f"https://ebird.org/species/{scode}")
|