Skip to content

Commit c15ee33

Browse files
committed
updated
1 parent 51d23ed commit c15ee33

File tree

8 files changed

+78
-209
lines changed

8 files changed

+78
-209
lines changed

.idea/dputils.iml

+3-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/misc.xml

+4
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/scraper.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def extract(dom_item, tags, data, errors):
150150
Here's a complete example of using the `scraper` module to extract data from a webpage:
151151

152152
```python
153-
from dputils.scraper import Scraper, Tag
153+
from dputils.scrape import Scraper, Tag
154154

155155
url = "https://www.example.com"
156156
scraper = Scraper(url)

poetry.lock

+1-162
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

-3
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,9 @@ docx2txt = "^0.8"
1717
"pdfminer.six" = "^20220524"
1818
fpdf2 = "^2.5.4"
1919
bs4 = "^0.0.1"
20-
requests = "^2.27.1"
2120
python-docx = "^0.8.11"
2221
httpx = {extras = ["http2"], version = "^0.25.1"}
2322

24-
[tool.poetry.dev-dependencies]
25-
pytest = "^5.2"
2623

2724
[build-system]
2825
requires = ["poetry-core>=1.0.0"]

setup.py

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from setuptools import setup, find_packages
2+
3+
setup(
4+
name='dputils',
5+
version='1.0.1',
6+
description='This library is utility library from digipodium',
7+
author='Team Digipodium, Zaid Kamil, AkulS1008',
8+
author_email='[email protected]',
9+
url='https://github.com/digipodium/dputils',
10+
packages=find_packages(),
11+
install_requires=[
12+
'docx2txt>=0.8',
13+
'pdfminer.six>=20220524',
14+
'fpdf2>=2.5.4',
15+
'bs4>=0.0.1',
16+
'python-docx>=0.8.11',
17+
'httpx[http2]>=0.25.1',
18+
],
19+
classifiers=[
20+
'Development Status :: 5 - Production/Stable',
21+
'Intended Audience :: Developers',
22+
'License :: OSI Approved :: MIT License',
23+
'Programming Language :: Python :: 3',
24+
],
25+
)

test.py

-41
This file was deleted.

tests/test.py

+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import pytest
2+
from dputils.scrape import Scraper, Tag
3+
4+
class TestScrapeModule:
5+
@pytest.fixture
6+
def scraper(self):
7+
url = "https://www.example.com"
8+
return Scraper(url)
9+
10+
@pytest.fixture
11+
def title_tag(self):
12+
return Tag(name='h1', cls='title', output='text')
13+
14+
@pytest.fixture
15+
def price_tag(self):
16+
return Tag(name='span', cls='price', output='text')
17+
18+
def test_get_data_from_page(self, scraper, title_tag, price_tag):
19+
data = scraper.get_data_from_page(title=title_tag, price=price_tag)
20+
assert isinstance(data, dict)
21+
assert 'title' in data
22+
assert 'price' in data
23+
24+
def test_get_repeating_data_from_page(self, scraper):
25+
target_tag = Tag(name='div', cls='product-list')
26+
item_tag = Tag(name='div', cls='product-item')
27+
title_tag = Tag(name='h2', cls='product-title', output='text')
28+
price_tag = Tag(name='span', cls='product-price', output='text')
29+
link_tag = Tag(name='a', cls='product-link', output='href')
30+
31+
products = scraper.get_repeating_data_from_page(
32+
target=target_tag,
33+
items=item_tag,
34+
title=title_tag,
35+
price=price_tag,
36+
link=link_tag
37+
)
38+
39+
assert isinstance(products, list)
40+
for product in products:
41+
assert isinstance(product, dict)
42+
assert 'title' in product
43+
assert 'price' in product
44+
assert 'link' in product

0 commit comments

Comments
 (0)