Skip to content

Commit

Permalink
[bilibili.space] Send requests directly through headless browser
Browse files Browse the repository at this point in the history
  • Loading branch information
SpriteOvO committed Mar 6, 2025
1 parent fe74531 commit 54b3568
Show file tree
Hide file tree
Showing 6 changed files with 142 additions and 80 deletions.
3 changes: 2 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ anyhow = "1.0.89"
bytes = "1.7.2"
chrono = "0.4.39"
clap = { version = "4.5.19", features = ["derive"] }
const_format = "0.2.34"
futures = "0.3.31"
headless_chrome = "1.0.15"
humantime-serde = "1.1.1"
Expand Down
23 changes: 22 additions & 1 deletion src/helper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ pub fn reqwest_client_with(
.timeout(Duration::from_secs(60) * 3)
.default_headers(HeaderMap::from_iter([(
header::USER_AGENT,
HeaderValue::from_str(&prop::user_agent(false)).unwrap(),
HeaderValue::from_str(&prop::UserAgent::Logo.as_str()).unwrap(),
)])),
)
.build()
Expand Down Expand Up @@ -51,3 +51,24 @@ macro_rules! serde_impl_default_for {
}
};
}

pub struct Defer<F: FnOnce()>(Option<F>);

impl<F: FnOnce()> Defer<F> {
pub fn new(f: F) -> Self {
Self(Some(f))
}
}

impl<F: FnOnce()> Drop for Defer<F> {
fn drop(&mut self) {
self.0.take().unwrap()();
}
}

#[macro_export]
macro_rules! defer {
($($code:tt)*) => {
let _defer = $crate::helper::Defer::new(|| { $($code)* });
};
}
61 changes: 48 additions & 13 deletions src/prop.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::borrow::Cow;

use const_format::formatcp;
use rand::distributions::{Alphanumeric, DistString};

pub struct Package {
Expand All @@ -12,19 +13,53 @@ pub const PACKAGE: Package = Package {
version: env!("CARGO_PKG_VERSION"),
};

pub fn user_agent(dynamic: bool) -> String {
format!(
"{}/{} (FAIR USE, PLEASE DO NOT BLOCK. Source opened on github.com/SpriteOvO/{}{})",
env!("CARGO_PKG_NAME"),
env!("CARGO_PKG_VERSION"),
env!("CARGO_PKG_NAME"),
if dynamic {
Cow::Owned(format!(
". {}",
pub enum UserAgent {
Logo,
LogoDynamic,
Mocked,
}

impl UserAgent {
pub fn as_str(&self) -> Cow<str> {
match self {
Self::Logo => Cow::Borrowed(formatcp!(
"{}/{} (FAIR USE, PLEASE DO NOT BLOCK. Source opened on github.com/SpriteOvO/{})",
PACKAGE.name,
PACKAGE.version,
PACKAGE.name,
)),
Self::LogoDynamic => Cow::Owned(format!(
"{} {})",
Self::Logo.as_str().strip_suffix(')').unwrap(),
Alphanumeric.sample_string(&mut rand::thread_rng(), 8)
))
} else {
Cow::Borrowed("")
)),
Self::Mocked => Cow::Borrowed(
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:136.0) Gecko/20100101 Firefox/136.0",
),
}
)
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn user_agent() {
assert_eq!(
UserAgent::Logo.as_str(),
format!(
"closely/{} (FAIR USE, PLEASE DO NOT BLOCK. Source opened on github.com/SpriteOvO/closely)",
env!("CARGO_PKG_VERSION"),
)
);

let dynamic = UserAgent::LogoDynamic.as_str();
assert!(dynamic.starts_with(&format!(
"closely/{} (FAIR USE, PLEASE DO NOT BLOCK. Source opened on github.com/SpriteOvO/closely ",
env!("CARGO_PKG_VERSION"),
)));
assert!(dynamic.ends_with(")"));
assert_eq!(dynamic.len(), UserAgent::Logo.as_str().len() + 9);
}
}
2 changes: 1 addition & 1 deletion src/source/platform/bilibili/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ fn bilibili_request_builder() -> anyhow::Result<reqwest::Client> {
helper::reqwest_client_with(|builder| {
builder.default_headers(HeaderMap::from_iter([(
header::USER_AGENT,
HeaderValue::from_str(&prop::user_agent(true)).unwrap(),
HeaderValue::from_str(&prop::UserAgent::LogoDynamic.as_str()).unwrap(),
)]))
})
}
Expand Down
132 changes: 68 additions & 64 deletions src/source/platform/bilibili/space.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
use std::{collections::HashSet, fmt, fmt::Display, future::Future, ops::DerefMut, pin::Pin};
use std::{
collections::HashSet,
fmt::{self, Display},
future::Future,
ops::DerefMut,
pin::Pin,
sync::{Arc, Mutex as StdMutex},
};

use anyhow::{anyhow, bail, ensure};
use chrono::DateTime;
use once_cell::sync::Lazy;
use reqwest::header::{self, HeaderValue};
use serde::Deserialize;
use serde_json::{self as json};
use spdlog::prelude::*;
use tokio::sync::Mutex;

use super::*;
use crate::{
defer,
platform::{PlatformMetadata, PlatformTrait},
source::{
FetcherTrait, Post, PostAttachment, PostAttachmentImage, PostContent, PostUrl, PostUrls,
Expand Down Expand Up @@ -477,71 +484,28 @@ impl Fetcher {
// Fans-only posts
struct BlockedPostIds(HashSet<String>);

#[allow(clippy::type_complexity)] // No, I don't think it's complex XD
static GUEST_COOKIES: Lazy<Mutex<Option<Vec<(String, String)>>>> = Lazy::new(|| Mutex::new(None));

async fn fetch_space_history(user_id: u64, blocked: &mut BlockedPostIds) -> anyhow::Result<Posts> {
fetch_space_history_impl(user_id, blocked, true).await
fetch_space_history_impl(user_id, blocked).await
}

fn fetch_space_history_impl<'a>(
user_id: u64,
blocked: &'a mut BlockedPostIds,
retry: bool,
) -> Pin<Box<dyn Future<Output = anyhow::Result<Posts>> + Send + 'a>> {
Box::pin(async move {
let mut guest_cookies = GUEST_COOKIES.lock().await;
if guest_cookies.is_none() {
*guest_cookies = Some(
obtain_guest_cookies()
.await
.map_err(|err| anyhow!("failed to obtain guest cookies: {err}"))?,
);
}
let cookies = guest_cookies
.as_ref()
.unwrap()
.iter()
.map(|(name, value)| format!("{}={}", name, value))
.collect::<Vec<_>>()
.join("; ");
let resp = bilibili_request_builder()?
.get(format!(
"https://api.bilibili.com/x/polymer/web-dynamic/v1/feed/space?host_mid={}&features=itemOpusStyle,listOnlyfans,opusBigCover,onlyfansVote,decorationCard,forwardListHidden,ugcDelete,onlyfansQaCard&dm_img_list=[]&dm_img_str=V2ViR0wgMS&dm_cover_img_str=REDACTED",
user_id
))
.header(header::COOKIE, HeaderValue::from_str(&cookies)?)
.send()
let (status, text) = fetch_space(user_id)
.await
.map_err(|err| anyhow!("failed to send request: {err}"))?;

let status = resp.status();
if !status.is_success() {
bail!("response status is not success: {resp:?}");
if status != 200 {
bail!("response status is not success: {text:?}");
}

let text = resp
.text()
.await
.map_err(|err| anyhow!("failed to obtain text from response: {err}"))?;

let resp: Response<data::SpaceHistory> = json::from_str(&text)
.map_err(|err| anyhow!("failed to deserialize response: {err}"))?;

match resp.code {
0 => {} // Success
-352 => {
// Auth error
if retry {
// Invalidate the guest cookies and retry
*guest_cookies = None;
drop(guest_cookies);
warn!("bilibili guest token expired, retrying with new token");
return fetch_space_history_impl(user_id, blocked, false).await;
} else {
bail!("bilibili failed with token expired, and already retried once")
}
}
-352 => bail!("auth error"),
_ => bail!("response contains error, response '{text}'"),
}

Expand Down Expand Up @@ -802,26 +766,66 @@ fn parse_response(resp: data::SpaceHistory, blocked: &mut BlockedPostIds) -> any
Ok(Posts(items))
}

async fn obtain_guest_cookies() -> anyhow::Result<Vec<(String, String)>> {
async fn fetch_space(user_id: u64) -> anyhow::Result<(u32, String)> {
// Okay, I gave up on cracking the auth process
use headless_chrome::{Browser, LaunchOptionsBuilder};

let browser = Browser::new(
LaunchOptionsBuilder::default()
// https://github.com/rust-headless-chrome/rust-headless-chrome/issues/267
.sandbox(false)
.build()?,
static BROWSER: Lazy<Browser> = Lazy::new(|| {
Browser::new(
LaunchOptionsBuilder::default()
// https://github.com/rust-headless-chrome/rust-headless-chrome/issues/267
.sandbox(false)
.headless(false)
.build()
.unwrap(),
)
.unwrap()
});

let tab = BROWSER.new_tab()?;
defer! {
_ = tab
.close(true)
.inspect_err(|err| warn!("bilibili headless browser: failed to close tab: {err}"));
}

let body_res = Arc::new(StdMutex::new(None));
tab.register_response_handling(
"",
Box::new({
let body_res = Arc::clone(&body_res);
move |event, fetch_body| {
if event
.response
.url
.starts_with("https://api.bilibili.com/x/polymer/web-dynamic/v1/feed/space")
{
*body_res.lock().unwrap() = Some((event.response.status, fetch_body()));
}
}
}),
)?;
let tab = browser.new_tab()?;
tab.navigate_to("https://space.bilibili.com/8047632/dynamic")?;

// To Bilibili Dev:
//
// If you are seeing this, please let me know an appropriate rate to request via
// email. This project is not intended to be a bad thing, just for personal use.
//
tab.set_user_agent(&prop::UserAgent::Mocked.as_str(), None, None)?;
tab.navigate_to(&format!("https://space.bilibili.com/{}/dynamic", user_id))?;
tab.wait_until_navigated()?;

let kvs = tab
.get_cookies()?
.into_iter()
.map(|cookie| (cookie.name, cookie.value))
.collect();
Ok(kvs)
let mut body_res = body_res.lock().unwrap();
let (status, body) = body_res
.take()
.ok_or_else(|| anyhow!("headless browser did not catch the expected response"))?;
let body = body.map_err(|err| anyhow!("headless browser failed to fetch the body: {err}"))?;
ensure!(
!body.base_64_encoded,
"headless browser returned a base64 encoded body"
);

Ok((status, body.body))
}

#[cfg(test)]
Expand Down

0 comments on commit 54b3568

Please sign in to comment.