Commit c9e971e6 authored by Daniel Niecke's avatar Daniel Niecke

change to newspaper lib

parent afa8bee2
......@@ -37,9 +37,10 @@ COPY /pw /pw
RUN chmod 666 /pw
EXPOSE 80
EXPOSE 443
CMD /usr/sbin/apache2ctl -D FOREGROUND
# run with
# docker build -t news-reader .
# docker rm -f news-reader
# docker run --hostname news.niecke-it.de -e "LETSENCRYPT_HOST=news.niecke-it.de" -e "LETSENCRYPT_EMAIL=niecke@bwl.uni-kiel.de" -e "VIRTUAL_HOST=news.niecke-it.de" -d -p 80:80 -p 443:443 -e ENV=production --name news-reader news-reader
# docker run -d -e ENV=production -p 7777:80 --name news-reader news-reader
docker run -d \
--name news-reader \
--net main_net \
--hostname news.niecke-it.de \
--expose 80 \
--expose 443 \
-e ENV=production \
-e "LETSENCRYPT_HOST=news.niecke-it.de" \
-e "LETSENCRYPT_EMAIL=niecke@bwl.uni-kiel.de" \
-e "VIRTUAL_HOST=news.niecke-it.de" \
news-reader
\ No newline at end of file
import random
import string
from xhtml2pdf import pisa
def generate_pdf(pdf_string, outputFilename):
pisa.showLogging()
# open output file for writing (truncated binary)
with open(outputFilename, "w+b") as out:
# convert HTML to PDF
pisaStatus = pisa.CreatePDF(pdf_string, dest=out)
# return True on success and False on errors
print(pisaStatus)
# -*- coding: utf-8 -*-
"""
Install:
- pip install xhtml2pdf
"""
import random
import string
import requests
from bs4 import BeautifulSoup
import re
from send_to_kindle import send_to_kindle
from generate_pdf import generate_pdf
import os
from newspaper import Article, fulltext
"""
import nltk
nltk.download('punkt')
"""
def process_one_url(url):
article = Article(url, keep_article_html=True)
article.download()
article.parse()
#print(article.text)
#print("-----------------------------------------------------------------------------")
#print(article.title)
#print("-----------------------------------------------------------------------------")
pdf_string = "<html><head><title>{}</title></head><body><h1>{}</h1>\n{}".format(article.title, article.title, article.article_html)
ran = ''.join([random.choice(string.ascii_letters + string.digits) for n in range(32)])
outputFilename = "/tmp/tmp_{}.pdf".format(ran)
generate_pdf(pdf_string, outputFilename)
print("Done.")
send_to_kindle(article.title, outputFilename)
print("Done with {}".format(pdf_string.title()))
if os.path.exists(outputFilename):
os.remove(outputFilename)
if __name__ == '__main__':
#with open('input.txt') as input_file:
# for line in input_file:
# process_one_url(line)
process_one_url('https://www.computerwoche.de/a/koennen-sie-software-defined-vehicle,3545653')
\ No newline at end of file
......@@ -4,19 +4,13 @@
Install:
- pip install xhtml2pdf
"""
import smtplib
from email import encoders
from xhtml2pdf import pisa
import random
import string
import requests
from bs4 import BeautifulSoup
import re
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
import unidecode
import os
import logging
from send_to_kindle import send_to_kindle
from generate_pdf import generate_pdf
def process_one_url(url):
res = requests.get(url, stream=True)
......@@ -27,65 +21,23 @@ def process_one_url(url):
paragraphs = soup.findAll('p', attrs={'id': re.compile('gpar[0-9]*')})
images = soup.findAll('figure', attrs={'class': 'hero'})
print("Processing PDF file...")
pdf_string = "<html><head><title>{}</title></head><body><h1>{}</h1>\n".format(title, title)
for p in paragraphs:
pdf_string += str(p) + "\n"
#for i in images:
# pdf_string += str(i) + "\n"
pdf_string += "</body></html>"
ran = ''.join([random.choice(string.ascii_letters + string.digits) for n in range(32)])
# Define your data
outputFilename = "/tmp/tmp.pdf"
outputFilename = "/tmp/tmp_{}.pdf".format(ran)
pisa.showLogging()
# open output file for writing (truncated binary)
with open(outputFilename, "w+b") as out:
# convert HTML to PDF
pisaStatus = pisa.CreatePDF(pdf_string, dest=out)
# return True on success and False on errors
print(pisaStatus)
generate_pdf(pdf_string, outputFilename)
print("Done.")
print("Sending via mail...")
fromaddr = 'd.p.a.niecke@googlemail.com'
toaddr = 'd.p.a.niecke_e64c19@kindle.com'
msg = MIMEMultipart()
msg['Subject'] = 'convert'
msg['From'] = fromaddr
msg['To'] = toaddr
part = MIMEBase('application', "octet-stream")
part.set_payload(open("/tmp/tmp.pdf", "rb").read())
encoders.encode_base64(part)
title_pdf = unidecode.unidecode(title)
part.add_header('Content-Disposition', 'attachment; filename={}.pdf'.format(title_pdf))
msg.attach(part)
username = 'd.p.a.niecke@googlemail.com'
with open("/pw") as pw:
password = pw.readline()
server = smtplib.SMTP('smtp.gmail.com:587')
server.starttls()
server.login(username, password)
server.sendmail(fromaddr, toaddr, msg.as_string())
server.quit()
print(msg)
if os.path.exists("/tmp/tmp.pdf"):
os.remove("/tmp/tmp.pdf")
send_to_kindle(title, outputFilename)
print("Done with {}".format(title))
......
import smtplib
from email import encoders
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
import unidecode
import os
def send_to_kindle(title, filename):
print("Sending via mail...")
fromaddr = 'd.p.a.niecke@googlemail.com'
toaddr = 'd.p.a.niecke_e64c19@kindle.com'
msg = MIMEMultipart()
msg['Subject'] = 'convert'
msg['From'] = fromaddr
msg['To'] = toaddr
part = MIMEBase('application', "octet-stream")
part.set_payload(open(filename, "rb").read())
encoders.encode_base64(part)
title_pdf = unidecode.unidecode(title)
part.add_header('Content-Disposition', 'attachment; filename={}.pdf'.format(title_pdf))
msg.attach(part)
username = 'd.p.a.niecke@googlemail.com'
with open("/pw") as pw:
password = pw.readline()
server = smtplib.SMTP('smtp.gmail.com:587')
server.starttls()
server.login(username, password)
server.sendmail(fromaddr, toaddr, msg.as_string())
server.quit()
print(msg)
from flask import Flask, render_template, flash, request
from wtforms import Form, TextField, validators
from server import parse_golem
from server import parse_compu_wo
# App config.
DEBUG = True
......@@ -27,7 +27,7 @@ def save_url():
with open('/saved_urls.txt', 'a') as saved:
saved.write(url + "\n")
parse_golem.process_one_url(url)
parse_compu_wo.process_one_url(url)
else:
flash('All the form fields are required. ')
......@@ -38,7 +38,7 @@ def save_url():
@app.route("/app", methods=['GET'])
def save_from_app():
if request.args.get("url"):
parse_golem.process_one_url(request.args.get("url"))
parse_compu_wo.process_one_url(request.args.get("url"))
return '', 200
else:
return '', 400
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment