Skip to content

Commit

Permalink
Implement splitted outputs #23
Browse files Browse the repository at this point in the history
  • Loading branch information
KnugiHK committed Jun 8, 2023
1 parent dbdfdae commit f63b180
Show file tree
Hide file tree
Showing 6 changed files with 184 additions and 68 deletions.
13 changes: 10 additions & 3 deletions Whatsapp_Chat_Exporter/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,13 @@ def main():
parser.add_argument(
"--size",
"--output-size",
"--split",
dest="size",
nargs='?',
type=int,
const=0,
default=None,
help="Maximum size of a single output file in bytes, 0 for auto (not yet implemented)"
help="Maximum (Rough) size of a single output file in bytes, 0 for auto"
)
parser.add_argument(
"--no-html",
Expand Down Expand Up @@ -216,7 +220,10 @@ def main():
elif args.iphone:
import sys
if "--iphone" in sys.argv:
print("WARNING: The --iphone flag is deprecated and will be removed in the future. Use --ios instead.")
print(
"WARNING: The --iphone flag is deprecated and will"
"be removed in the future. Use --ios instead."
)
messages = extract_iphone.messages
media = extract_iphone.media
vcard = extract_iphone.vcard
Expand Down Expand Up @@ -271,7 +278,7 @@ def main():
shutil.move(args.media, f"{args.output}/")
except PermissionError:
print("Cannot remove original WhatsApp directory. "
"Perhaps the directory is opened?")
"Perhaps the directory is opened?")

if args.json:
if isinstance(data[next(iter(data))], ChatStore):
Expand Down
9 changes: 8 additions & 1 deletion Whatsapp_Chat_Exporter/data_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,16 @@ def delete_message(self, id):
del self.messages[id]

def to_json(self):
serialized_msgs = {id : msg.to_json() for id,msg in self.messages.items()}
serialized_msgs = {id: msg.to_json() for id, msg in self.messages.items()}
return {'name' : self.name, 'messages' : serialized_msgs}

def get_last_message(self):
return tuple(self.messages.values())[-1]

def get_messages(self):
return self.messages.values()


class Message():
def __init__(self, from_me: Union[bool,int], timestamp: int, time: str, key_id: int):
self.from_me = bool(from_me)
Expand Down
80 changes: 59 additions & 21 deletions Whatsapp_Chat_Exporter/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from mimetypes import MimeTypes
from hashlib import sha256
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
from Whatsapp_Chat_Exporter.utility import sanitize_except, determine_day, Crypt
from Whatsapp_Chat_Exporter.utility import MAX_SIZE, ROW_SIZE, rendering, sanitize_except, determine_day, Crypt
from Whatsapp_Chat_Exporter.utility import brute_force_offset, CRYPT14_OFFSETS

try:
Expand Down Expand Up @@ -49,7 +49,7 @@ def _extract_encrypted_key(keyfile):
key_stream += byte.to_bytes(1, "big", signed=True)

return _generate_hmac_of_hmac(key_stream)


def decrypt_backup(database, key, output, crypt=Crypt.CRYPT14, show_crypt15=False):
if not support_backup:
Expand Down Expand Up @@ -82,7 +82,7 @@ def decrypt_backup(database, key, output, crypt=Crypt.CRYPT14, show_crypt15=Fals
raise ValueError("The crypt15 file must be at least 131 bytes")
t1 = t2 = None
iv = database[8:24]
db_offset = database[0] + 2 # Skip protobuf + protobuf size and backup type
db_offset = database[0] + 2 # Skip protobuf + protobuf size and backup type
db_ciphertext = database[db_offset:]

if t1 != t2:
Expand Down Expand Up @@ -253,13 +253,13 @@ def messages(db, data):
if content["key_remote_jid"] not in data:
data[content["key_remote_jid"]] = ChatStore()
if content["key_remote_jid"] is None:
continue # Not sure
continue # Not sure
data[content["key_remote_jid"]].add_message(content["_id"], Message(
from_me=content["key_from_me"],
timestamp=content["timestamp"],
time=content["timestamp"],
key_id=content["key_id"],
))
))
if "-" in content["key_remote_jid"] and content["key_from_me"] == 0:
name = None
if table_message:
Expand Down Expand Up @@ -495,7 +495,7 @@ def vcard(db, data):
ON jid._id = chat.jid_row_id
ORDER BY message.chat_row_id ASC;"""
)

rows = c.fetchall()
total_row_number = len(rows)
print(f"\nGathering vCards...(0/{total_row_number})", end="\r")
Expand Down Expand Up @@ -558,33 +558,71 @@ def create_html(
w3css = os.path.join(offline_static, "w3.css")

for current, contact in enumerate(data):
if len(data[contact].messages) == 0:
chat = data[contact]
if len(chat.messages) == 0:
continue
phone_number = contact.split('@')[0]
if "-" in contact:
file_name = ""
else:
file_name = phone_number

if data[contact].name is not None:
if chat.name is not None:
if file_name != "":
file_name += "-"
file_name += data[contact].name.replace("/", "-")
name = data[contact].name
file_name += chat.name.replace("/", "-")
name = chat.name
else:
name = phone_number
safe_file_name = ''

safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ")
with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f:
f.write(
template.render(
name=name,
msgs=data[contact].messages.values(),
my_avatar=None,
their_avatar=f"WhatsApp/Avatars/{contact}.j",
w3css=w3css
)
)

if maximum_size is not None:
current_size = 0
current_page = 1
render_box = []
if maximum_size == 0:
maximum_size = MAX_SIZE
last_msg = chat.get_last_message().key_id
for message in chat.get_messages():
if message.data is not None and not message.meta and not message.media:
current_size += len(message.data) + ROW_SIZE
else:
current_size += ROW_SIZE + 100 # Assume media and meta HTML are 100 bytes
if current_size > maximum_size:
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
rendering(
output_file_name,
template,
name,
render_box,
contact,
w3css,
f"{safe_file_name}-{current_page + 1}.html"
)
render_box = [message]
current_size = 0
current_page += 1
else:
if message.key_id == last_msg:
if current_page == 1:
output_file_name = f"{output_folder}/{safe_file_name}.html"
else:
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
rendering(
output_file_name,
template,
name,
render_box,
contact,
w3css,
False
)
else:
render_box.append(message)
else:
output_file_name = f"{output_folder}/{safe_file_name}.html"
rendering(output_file_name, template, name, chat.get_messages(), contact, w3css, False)
if current % 10 == 0:
print(f"Creating HTML...({current}/{total_row_number})", end="\r")

Expand Down
82 changes: 62 additions & 20 deletions Whatsapp_Chat_Exporter/extract_iphone.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,13 @@

import sqlite3
import json
import string
import jinja2
import os
import shutil
from pathlib import Path
from datetime import datetime
from mimetypes import MimeTypes
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
from Whatsapp_Chat_Exporter.utility import sanitize_except, determine_day, APPLE_TIME
from Whatsapp_Chat_Exporter.utility import MAX_SIZE, ROW_SIZE, rendering, sanitize_except, determine_day, APPLE_TIME


def messages(db, data):
Expand Down Expand Up @@ -56,7 +54,7 @@ def messages(db, data):
data[_id].add_message(Z_PK, Message(
from_me=content["ZISFROMME"],
timestamp=ts,
time=ts, # Could be bug
time=ts, # TODO: Could be bug
key_id=content["ZSTANZAID"][:17],
))
if "-" in _id and content["ZISFROMME"] == 0:
Expand Down Expand Up @@ -226,7 +224,14 @@ def vcard(db, data):
print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r")


def create_html(data, output_folder, template=None, embedded=False, offline_static=False, maximum_size=None):
def create_html(
data,
output_folder,
template=None,
embedded=False,
offline_static=False,
maximum_size=None
):
if template is None:
template_dir = os.path.dirname(__file__)
template_file = "whatsapp.html"
Expand Down Expand Up @@ -258,34 +263,71 @@ def create_html(data, output_folder, template=None, embedded=False, offline_stat
w3css = os.path.join(offline_static, "w3.css")

for current, contact in enumerate(data):
if len(data[contact].messages) == 0:
chat = data[contact]
if len(chat.messages) == 0:
continue
phone_number = contact.split('@')[0]
if "-" in contact:
file_name = ""
else:
file_name = phone_number

if data[contact].name is not None:
if chat.name is not None:
if file_name != "":
file_name += "-"
file_name += data[contact].name.replace("/", "-")
name = data[contact].name
file_name += chat.name.replace("/", "-")
name = chat.name
else:
name = phone_number

safe_file_name = ''
safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ")
with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f:
f.write(
template.render(
name=name,
msgs=data[contact].messages.values(),
my_avatar=None,
their_avatar=f"WhatsApp/Avatars/{contact}.j",
w3css=w3css
)
)

if maximum_size is not None:
current_size = 0
current_page = 1
render_box = []
if maximum_size == 0:
maximum_size = MAX_SIZE
last_msg = chat.get_last_message().key_id
for message in chat.get_messages():
if message.data is not None and not message.meta and not message.media:
current_size += len(message.data) + ROW_SIZE
else:
current_size += ROW_SIZE + 100 # Assume media and meta HTML are 100 bytes
if current_size > maximum_size:
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
rendering(
output_file_name,
template,
name,
render_box,
contact,
w3css,
f"{safe_file_name}-{current_page + 1}.html"
)
render_box = [message]
current_size = 0
current_page += 1
else:
if message.key_id == last_msg:
if current_page == 1:
output_file_name = f"{output_folder}/{safe_file_name}.html"
else:
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
rendering(
output_file_name,
template,
name,
render_box,
contact,
w3css,
False
)
else:
render_box.append(message)
else:
output_file_name = f"{output_folder}/{safe_file_name}.html"
rendering(output_file_name, template, name, chat.get_messages(), contact, w3css, False)
if current % 10 == 0:
print(f"Creating HTML...({current}/{total_row_number})", end="\r")

Expand Down
Loading

0 comments on commit f63b180

Please sign in to comment.