Initial commit

This commit is contained in:
Kumi 2023-09-06 12:06:20 +02:00
commit 9e930bddbb
Signed by: kumi
GPG key ID: ECBCC9082395383F
9 changed files with 306 additions and 0 deletions

5
.gitignore vendored Normal file
View file

@ -0,0 +1,5 @@
venv/
*.pyc
__pycache__/
dist/
settings.ini

19
LICENSE Normal file
View file

@ -0,0 +1,19 @@
Copyright (c) 2023 Kumi Systems e.U. <office@kumi.systems>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

19
README.md Normal file
View file

@ -0,0 +1,19 @@
# S3 Downloader
This is a simple tool to download files from S3. It is intended to be used as a CLI tool, but can also be used as a library.
## Requirements
* Python 3.8+
## Installation
```bash
python -m venv venv
source venv/bin/activate
pip install git+https://kumig.it/kumisystems/s3downloader.git
```
## License
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.

29
pyproject.toml Normal file
View file

@ -0,0 +1,29 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "s3downloader"
version = "0.1.0"
authors = [
{ name="Kumi Mitterer", email="s3downloader@kumi.email" },
]
description = "Simple Python CLI tool to download files from S3"
readme = "README.md"
license = { file="LICENSE" }
requires-python = ">=3.10"
classifiers = [
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
]
dependencies = [
"boto3",
]
[project.urls]
"Homepage" = "https://kumig.it/kumitterer/s3downloader"
"Bug Tracker" = "https://kumig.it/kumitterer/s3downloader/issues"
[project.scripts]
s3downloader = "s3downloader.__main__:main"

24
settings.dist.ini Normal file
View file

@ -0,0 +1,24 @@
[S3]
# The credentials to your S3 bucket
access_key = your-access-key
secret_key = your-secret-key
# The name of your S3 bucket
bucket_name = your-bucket
# The path where files are stored within that bucket
# Can be omitted if files are in the base directory
path = in/here/
# Where downloaded files will be stored
final_dir = /your/output/directory
# Delete files from bucket after downloading? (1 = yes, 0 = no)
delete = 1
# To add further arguments for boto3.Client, list them below:
#
# endpoint_url = https://my.minio.instance/
#
# See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html#boto3.session.Session.client

View file

View file

@ -0,0 +1,41 @@
from .classes.client import S3Client
from .classes.config import Config
import logging
from argparse import ArgumentParser
def main():
parser = ArgumentParser()
parser.add_argument(
"--config",
type=str,
default="settings.ini",
help="Path to configuration file",
)
parser.add_argument(
"--section",
type=str,
default="S3",
help="Section in configuration file",
)
parser.add_argument(
"--log",
type=str,
default="INFO",
help="Logging level",
)
args = parser.parse_args()
logging.basicConfig(level=args.log)
client = S3Client.from_config(args.config, args.section)
client.process_files()
if __name__ == '__main__':
main()

View file

@ -0,0 +1,118 @@
import boto3
import tempfile
import logging
from botocore.exceptions import NoCredentialsError
from botocore.client import BaseClient
from pathlib import Path
from os import PathLike
from typing import List
from .config import Config
class S3Client:
def __init__(
self,
access_key: str,
secret_key: str,
bucket_name: str,
path: str,
final_dir: PathLike,
delete: bool = False,
**kwargs,
):
self.s3 = self.connect(access_key, secret_key, **kwargs)
self.bucket_name = bucket_name
self.path = path
self.final_dir = Path(final_dir)
self.delete = delete
self.final_dir.mkdir(exist_ok=True)
assert (
self.final_dir.is_dir()
), "Final directory does not exist or is not a directory"
@classmethod
def from_config(cls, config_file: PathLike, section: str = "S3") -> "S3Client":
config = Config(config_file, section)
return cls(
config.access_key,
config.secret_key,
config.bucket_name,
config.path,
config.final_dir,
config.delete,
**config.kwargs,
)
def connect(self, access_key: str, secret_key: str, **kwargs) -> BaseClient:
logging.debug("Connecting to S3")
s3 = boto3.client(
"s3",
aws_access_key_id=access_key,
aws_secret_access_key=secret_key,
**kwargs,
)
return s3
def list_files(self) -> List[str]:
logging.debug("Listing files in S3")
return [
obj["Key"]
for obj in self.s3.list_objects(
Bucket=self.bucket_name, Prefix=self.path
).get("Contents", [])
]
def download_files(self) -> bool:
try:
logging.debug("Downloading files")
for obj in self.list_files():
if not self._exists_local(obj):
self.download_file(obj)
else:
logging.warn(f"File already exists locally, skipping: {obj}")
except Exception as e:
print(e)
return False
return True
def download_file(self, filename: str) -> None:
logging.info(f"Downloading file from S3: {filename}")
with tempfile.TemporaryFile() as temp_file:
self.s3.download_fileobj(self.bucket_name, filename, temp_file)
temp_file.seek(0)
self.move_file(temp_file, filename)
def move_file(self, temp_file: tempfile.TemporaryFile, filename: str) -> None:
logging.debug(f"Moving file to final directory: {filename}")
with open(self.final_dir / Path(filename).name, "wb") as final_file:
final_file.write(temp_file.read())
def delete_files(self) -> None:
logging.debug("Deleting files from S3")
for obj in self.list_files():
self.delete_file(obj)
def delete_file(self, filename) -> None:
logging.info(f"Deleting file from S3: {filename}")
self.s3.delete_object(Bucket=self.bucket_name, Key=filename)
def process_files(self) -> None:
logging.debug("Processing files")
if self.download_files() and self.delete:
self.delete_files()
def _exists_local(self, filename: str) -> bool:
logging.debug(f"Checking if file exists locally: {filename}")
return Path(self.final_dir / Path(filename).name).exists()

View file

@ -0,0 +1,51 @@
from configparser import ConfigParser
import logging
class Config:
def __init__(self, config_file=[], section="S3"):
logging.debug(f"Reading configuration file(s): {config_file}")
self.config = ConfigParser()
self.config.read(config_file)
self.section = section
@property
def access_key(self):
return self.config[self.section]["access_key"]
@property
def secret_key(self):
return self.config[self.section]["secret_key"]
@property
def bucket_name(self):
return self.config[self.section]["bucket_name"]
@property
def path(self):
return self.config[self.section].get("path", "")
@property
def final_dir(self):
return self.config[self.section]["final_dir"]
@property
def delete(self):
return self.config[self.section].getboolean("delete")
@property
def kwargs(self):
kwargs = {}
for key, value in self.config[self.section].items():
if not key in [
"access_key",
"secret_key",
"bucket_name",
"path",
"final_dir",
"delete",
]:
kwargs[key] = value
return kwargs