Skip to content

Commit

Permalink
Implement empty chat filtering from SQL #112
Browse files Browse the repository at this point in the history
This commit also removed the old empty chat filtering logic.
  • Loading branch information
KnugiHK committed Jan 4, 2025
1 parent 92d710b commit 23af55d
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 30 deletions.
20 changes: 8 additions & 12 deletions Whatsapp_Chat_Exporter/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
from Whatsapp_Chat_Exporter import exported_handler, android_handler
from Whatsapp_Chat_Exporter import ios_handler, ios_media_handler
from Whatsapp_Chat_Exporter.data_model import ChatStore
from Whatsapp_Chat_Exporter.utility import APPLE_TIME, Crypt, DbType, chat_is_empty, readable_to_bytes
from Whatsapp_Chat_Exporter.utility import check_update, import_from_json, sanitize_filename, bytes_to_readable
from Whatsapp_Chat_Exporter.utility import APPLE_TIME, Crypt, DbType, readable_to_bytes, check_update
from Whatsapp_Chat_Exporter.utility import import_from_json, sanitize_filename, bytes_to_readable
from argparse import ArgumentParser, SUPPRESS
from datetime import datetime
from sys import exit
Expand Down Expand Up @@ -254,7 +254,9 @@ def main():
dest="filter_empty",
default=True,
action='store_false',
help="By default, the exporter will not render chats with no valid message. Setting this flag will cause the exporter to render those."
help=("By default, the exporter will not render chats with no valid message. "
"Setting this flag will cause the exporter to render those. "
"This is useful if chat(s) are missing from the output")
)
parser.add_argument(
"--per-chat",
Expand Down Expand Up @@ -504,9 +506,9 @@ def main():
if os.path.isfile(msg_db):
with sqlite3.connect(msg_db) as db:
db.row_factory = sqlite3.Row
messages(db, data, args.media, args.timezone_offset, args.filter_date, filter_chat)
media(db, data, args.media, args.filter_date, filter_chat, args.separate_media)
vcard(db, data, args.media, args.filter_date, filter_chat)
messages(db, data, args.media, args.timezone_offset, args.filter_date, filter_chat, args.filter_empty)
media(db, data, args.media, args.filter_date, filter_chat, args.filter_empty, args.separate_media)
vcard(db, data, args.media, args.filter_date, filter_chat, args.filter_empty)
if args.android:
android_handler.calls(db, data, args.timezone_offset, filter_chat)
elif args.ios and args.call_db_ios is not None:
Expand All @@ -525,7 +527,6 @@ def main():
args.offline,
args.size,
args.no_avatar,
args.filter_empty,
args.whatsapp_theme
)
else:
Expand Down Expand Up @@ -563,7 +564,6 @@ def main():
args.offline,
args.size,
args.no_avatar,
args.filter_empty,
args.whatsapp_theme
)
for file in glob.glob(r'*.*'):
Expand All @@ -578,7 +578,6 @@ def main():
args.offline,
args.size,
args.no_avatar,
args.filter_empty,
args.whatsapp_theme
)

Expand All @@ -587,9 +586,6 @@ def main():
android_handler.create_txt(data, args.text_format)

if args.json and not args.import_json:
if args.filter_empty:
data = {k: v for k, v in data.items() if not chat_is_empty(v)}

if args.enrich_from_vcards is not None and not contact_store.is_empty():
contact_store.enrich_from_vcards(data)

Expand Down
33 changes: 24 additions & 9 deletions Whatsapp_Chat_Exporter/android_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
from base64 import b64decode, b64encode
from datetime import datetime
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
from Whatsapp_Chat_Exporter.utility import CURRENT_TZ_OFFSET, MAX_SIZE, ROW_SIZE, DbType, convert_time_unit, determine_metadata
from Whatsapp_Chat_Exporter.utility import rendering, Crypt, Device, get_file_name, setup_template, JidType
from Whatsapp_Chat_Exporter.utility import CURRENT_TZ_OFFSET, MAX_SIZE, ROW_SIZE, DbType, convert_time_unit, determine_metadata, get_cond_for_empty
from Whatsapp_Chat_Exporter.utility import rendering, Crypt, Device, get_file_name, setup_template
from Whatsapp_Chat_Exporter.utility import brute_force_offset, CRYPT14_OFFSETS, get_status_location
from Whatsapp_Chat_Exporter.utility import get_chat_condition, slugify, bytes_to_readable, chat_is_empty
from Whatsapp_Chat_Exporter.utility import get_chat_condition, slugify, bytes_to_readable, JidType

try:
import zlib
Expand Down Expand Up @@ -173,15 +173,18 @@ def contacts(db, data):
row = c.fetchone()


def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat):
def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat, filter_empty):
# Get message history
c = db.cursor()
try:
c.execute(f"""SELECT count()
FROM messages
INNER JOIN jid
ON messages.key_remote_jid = jid.raw_string
LEFT JOIN chat
ON chat.jid_row_id = jid._id
WHERE 1=1
{get_cond_for_empty(filter_empty, "messages.key_remote_jid", "messages.needs_push")}
{f'AND timestamp {filter_date}' if filter_date is not None else ''}
{get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")}
{get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")}""")
Expand All @@ -196,6 +199,7 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat):
LEFT JOIN jid jid_group
ON jid_group._id = message.sender_jid_row_id
WHERE 1=1
{get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast")}
{f'AND timestamp {filter_date}' if filter_date is not None else ''}
{get_chat_condition(filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")}
{get_chat_condition(filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")}""")
Expand Down Expand Up @@ -253,6 +257,7 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat):
LEFT JOIN receipt_user
ON receipt_user.message_row_id = messages._id
WHERE messages.key_remote_jid <> '-1'
{get_cond_for_empty(filter_empty, "messages.key_remote_jid", "messages.needs_push")}
{f'AND messages.timestamp {filter_date}' if filter_date is not None else ''}
{get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")}
{get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")}
Expand Down Expand Up @@ -321,6 +326,7 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat):
LEFT JOIN receipt_user
ON receipt_user.message_row_id = message._id
WHERE key_remote_jid <> '-1'
{get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")}
{f'AND message.timestamp {filter_date}' if filter_date is not None else ''}
{get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")}
{get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")}
Expand Down Expand Up @@ -488,7 +494,7 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat):
print(f"Processing messages...({total_row_number}/{total_row_number})", end="\r")


def media(db, data, media_folder, filter_date, filter_chat, separate_media=True):
def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separate_media=True):
# Get media
c = db.cursor()
try:
Expand All @@ -498,7 +504,10 @@ def media(db, data, media_folder, filter_date, filter_chat, separate_media=True)
ON message_media.message_row_id = messages._id
INNER JOIN jid
ON messages.key_remote_jid = jid.raw_string
LEFT JOIN chat
ON chat.jid_row_id = jid._id
WHERE 1=1
{get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push")}
{f'AND messages.timestamp {filter_date}' if filter_date is not None else ''}
{get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")}
{get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")}""")
Expand All @@ -514,6 +523,7 @@ def media(db, data, media_folder, filter_date, filter_chat, separate_media=True)
LEFT JOIN jid jid_group
ON jid_group._id = message.sender_jid_row_id
WHERE 1=1
{get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast")}
{f'AND message.timestamp {filter_date}' if filter_date is not None else ''}
{get_chat_condition(filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")}
{get_chat_condition(filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")}""")
Expand All @@ -536,7 +546,10 @@ def media(db, data, media_folder, filter_date, filter_chat, separate_media=True)
ON message_media.file_hash = media_hash_thumbnail.media_hash
INNER JOIN jid
ON messages.key_remote_jid = jid.raw_string
LEFT JOIN chat
ON chat.jid_row_id = jid._id
WHERE jid.type <> 7
{get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")}
{f'AND messages.timestamp {filter_date}' if filter_date is not None else ''}
{get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")}
{get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")}
Expand All @@ -563,6 +576,7 @@ def media(db, data, media_folder, filter_date, filter_chat, separate_media=True)
LEFT JOIN jid jid_group
ON jid_group._id = message.sender_jid_row_id
WHERE jid.type <> 7
{get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")}
{f'AND message.timestamp {filter_date}' if filter_date is not None else ''}
{get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")}
{get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")}
Expand Down Expand Up @@ -613,7 +627,7 @@ def media(db, data, media_folder, filter_date, filter_chat, separate_media=True)
f"Processing media...({total_row_number}/{total_row_number})", end="\r")


def vcard(db, data, media_folder, filter_date, filter_chat):
def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
c = db.cursor()
try:
c.execute(f"""SELECT message_row_id,
Expand All @@ -625,7 +639,10 @@ def vcard(db, data, media_folder, filter_date, filter_chat):
ON messages_vcards.message_row_id = messages._id
INNER JOIN jid
ON messages.key_remote_jid = jid.raw_string
LEFT JOIN chat
ON chat.jid_row_id = jid._id
WHERE 1=1
{get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push")}
{f'AND messages.timestamp {filter_date}' if filter_date is not None else ''}
{get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")}
{get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")}
Expand All @@ -646,6 +663,7 @@ def vcard(db, data, media_folder, filter_date, filter_chat):
LEFT JOIN jid jid_group
ON jid_group._id = message.sender_jid_row_id
WHERE 1=1
{get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")}
{f'AND message.timestamp {filter_date}' if filter_date is not None else ''}
{get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")}
{get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")}
Expand Down Expand Up @@ -760,7 +778,6 @@ def create_html(
offline_static=False,
maximum_size=None,
no_avatar=False,
filter_empty=True,
experimental=False
):
template = setup_template(template, no_avatar, experimental)
Expand All @@ -775,8 +792,6 @@ def create_html(

for current, contact in enumerate(data):
chat = data[contact]
if filter_empty and chat_is_empty(chat):
continue
safe_file_name, name = get_file_name(contact, chat)

if maximum_size is not None:
Expand Down
6 changes: 3 additions & 3 deletions Whatsapp_Chat_Exporter/ios_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def contacts(db, data):
content = c.fetchone()


def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat):
def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat, filter_empty):
c = db.cursor()
# Get contacts
c.execute(
Expand Down Expand Up @@ -227,7 +227,7 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat):
f"Processing messages...({total_row_number}/{total_row_number})", end="\r")


def media(db, data, media_folder, filter_date, filter_chat, separate_media=False):
def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separate_media=False):
c = db.cursor()
# Get media
c.execute(f"""SELECT count()
Expand Down Expand Up @@ -308,7 +308,7 @@ def media(db, data, media_folder, filter_date, filter_chat, separate_media=False
f"Processing media...({total_row_number}/{total_row_number})", end="\r")


def vcard(db, data, media_folder, filter_date, filter_chat):
def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
c = db.cursor()
c.execute(f"""SELECT DISTINCT ZWAVCARDMENTION.ZMEDIAITEM,
ZWAMEDIAITEM.ZMESSAGE,
Expand Down
10 changes: 4 additions & 6 deletions Whatsapp_Chat_Exporter/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,10 @@ def get_file_name(contact: str, chat: ChatStore):
return sanitize_filename(file_name), name


def get_cond_for_empty(enable, jid_field: str, broadcast_field: str):
return f"AND (chat.sort_timestamp IS NOT NULL OR {jid_field}='status@broadcast' OR {broadcast_field}>0)" if enable else ""


def get_chat_condition(filter, include, columns, jid=None, platform=None):
if filter is not None:
conditions = []
Expand All @@ -245,12 +249,6 @@ def get_chat_condition(filter, include, columns, jid=None, platform=None):
else:
return ""

def _is_message_empty(message):
return (message.data is None or message.data == "") and not message.media

def chat_is_empty(chat: ChatStore):
return len(chat.messages) == 0 or all(_is_message_empty(message) for message in chat.messages.values())


# Android Specific
CRYPT14_OFFSETS = (
Expand Down

0 comments on commit 23af55d

Please sign in to comment.