Skip to content

Commit 5eebf7f

Browse files
authored
Merge pull request #1376 from omegaatt36/master
feat: support www.threads.net
2 parents a7fc780 + b43a4aa commit 5eebf7f

File tree

5 files changed

+240
-0
lines changed

5 files changed

+240
-0
lines changed

.github/workflows/stream_threads.yml

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
name: instagram
2+
3+
on:
4+
push:
5+
paths:
6+
- "extractors/threads/*.go"
7+
- ".github/workflows/stream_threads.yml"
8+
pull_request:
9+
paths:
10+
- "extractors/threads/*.go"
11+
- ".github/workflows/stream_threads.yml"
12+
schedule:
13+
# run ci weekly
14+
- cron: "0 0 * * 0"
15+
16+
jobs:
17+
test:
18+
runs-on: ${{ matrix.os }}
19+
strategy:
20+
matrix:
21+
go: ["1.22"]
22+
os: [ubuntu-latest]
23+
name: ${{ matrix.os }}
24+
steps:
25+
- uses: actions/checkout@v4
26+
- uses: actions/setup-go@v5
27+
with:
28+
go-version: ${{ matrix.go }}
29+
30+
- name: Test
31+
run: go test -timeout 5m -race -coverpkg=./... -coverprofile=coverage.txt github.com/iawia002/lux/extractors/threads

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -622,6 +622,7 @@ $ lux -j "https://www.bilibili.com/video/av20203945"
622622
| 秒拍 | <https://www.miaopai.com> || | | | | [![miaopai](https://github.com/iawia002/lux/actions/workflows/stream_miaopai.yml/badge.svg)](https://github.com/iawia002/lux/actions/workflows/stream_miaopai.yml) |
623623
| 微博 | <https://weibo.com> || | | | | [![weibo](https://github.com/iawia002/lux/actions/workflows/stream_weibo.yml/badge.svg)](https://github.com/iawia002/lux/actions/workflows/stream_weibo.yml) |
624624
| Instagram | <https://www.instagram.com> ||| | | | [![instagram](https://github.com/iawia002/lux/actions/workflows/stream_instagram.yml/badge.svg)](https://github.com/iawia002/lux/actions/workflows/stream_instagram.yml) |
625+
| Threads | <https://www.threads.net> ||| | | | [![threads](https://github.com/iawia002/lux/actions/workflows/stream_threads.yml/badge.svg)](https://github.com/iawia002/lux/actions/workflows/stream_threads.yml) |
625626
| Twitter | <https://twitter.com> || | | | | [![twitter](https://github.com/iawia002/lux/actions/workflows/stream_twitter.yml/badge.svg)](https://github.com/iawia002/lux/actions/workflows/stream_twitter.yml) |
626627
| 腾讯视频 | <https://v.qq.com> || | | | | [![qq](https://github.com/iawia002/lux/actions/workflows/stream_qq.yml/badge.svg)](https://github.com/iawia002/lux/actions/workflows/stream_qq.yml) |
627628
| 网易云音乐 | <https://music.163.com> || | | | | [![netease](https://github.com/iawia002/lux/actions/workflows/stream_netease.yml/badge.svg)](https://github.com/iawia002/lux/actions/workflows/stream_netease.yml) |

app/register.go

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
_ "github.com/iawia002/lux/extractors/rumble"
2929
_ "github.com/iawia002/lux/extractors/streamtape"
3030
_ "github.com/iawia002/lux/extractors/tangdou"
31+
_ "github.com/iawia002/lux/extractors/threads"
3132
_ "github.com/iawia002/lux/extractors/tiktok"
3233
_ "github.com/iawia002/lux/extractors/tumblr"
3334
_ "github.com/iawia002/lux/extractors/twitter"

extractors/threads/threads.go

+151
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
package threads
2+
3+
import (
4+
"fmt"
5+
"net"
6+
"net/http"
7+
netURL "net/url"
8+
"strings"
9+
"time"
10+
11+
"github.com/gocolly/colly/v2"
12+
"github.com/pkg/errors"
13+
14+
"github.com/iawia002/lux/extractors"
15+
"github.com/iawia002/lux/request"
16+
"github.com/iawia002/lux/utils"
17+
)
18+
19+
func init() {
20+
extractors.Register("threads", New())
21+
}
22+
23+
type extractor struct {
24+
client *http.Client
25+
}
26+
27+
// New returns a instagram extractor.
28+
func New() extractors.Extractor {
29+
return &extractor{
30+
client: &http.Client{
31+
Timeout: 10 * time.Second,
32+
Transport: &http.Transport{
33+
Dial: (&net.Dialer{
34+
Timeout: 5 * time.Second,
35+
}).Dial,
36+
TLSHandshakeTimeout: 5 * time.Second,
37+
},
38+
},
39+
}
40+
}
41+
42+
type media struct {
43+
URL string
44+
Type extractors.DataType
45+
}
46+
47+
// Extract is the main function to extract the data.
48+
func (e *extractor) Extract(url string, option extractors.Options) ([]*extractors.Data, error) {
49+
URL, err := netURL.Parse(url)
50+
if err != nil {
51+
return nil, errors.WithStack(err)
52+
}
53+
54+
paths := strings.Split(URL.Path, "/")
55+
if len(paths) < 3 {
56+
return nil, errors.New("invalid URL format")
57+
}
58+
59+
poster := paths[1]
60+
shortCode := paths[3]
61+
62+
medias := make([]media, 0)
63+
64+
title := fmt.Sprintf("Threads %s - %s", poster, shortCode)
65+
66+
collector := colly.NewCollector()
67+
collector.SetClient(e.client)
68+
69+
// case single image or video
70+
collector.OnHTML("div.SingleInnerMediaContainer", func(e *colly.HTMLElement) {
71+
if src := e.ChildAttr("img", "src"); src != "" {
72+
medias = append(medias, media{
73+
URL: src,
74+
Type: extractors.DataTypeImage,
75+
})
76+
}
77+
if src := e.ChildAttr("video > source", "src"); src != "" {
78+
medias = append(medias, media{
79+
URL: src,
80+
Type: extractors.DataTypeVideo,
81+
})
82+
}
83+
})
84+
85+
// case multiple image or video
86+
collector.OnHTML("div.MediaScrollImageContainer", func(e *colly.HTMLElement) {
87+
if src := e.ChildAttr("img", "src"); src != "" {
88+
medias = append(medias, media{
89+
URL: src,
90+
Type: extractors.DataTypeImage,
91+
})
92+
}
93+
if src := e.ChildAttr("video > source", "src"); src != "" {
94+
medias = append(medias, media{
95+
URL: src,
96+
Type: extractors.DataTypeVideo,
97+
})
98+
}
99+
})
100+
101+
// title with caption
102+
// collector.OnHTML("span.BodyTextContainer", func(e *colly.HTMLElement) {
103+
// title = e.Text
104+
// })
105+
106+
if err := collector.Visit(URL.JoinPath("embed").String()); err != nil {
107+
return nil, fmt.Errorf("failed to send HTTP request to the Threads: %w", errors.WithStack(err))
108+
}
109+
110+
var totalSize int64
111+
var parts []*extractors.Part
112+
113+
for _, m := range medias {
114+
_, ext, err := utils.GetNameAndExt(m.URL)
115+
if err != nil {
116+
return nil, errors.WithStack(err)
117+
}
118+
fileSize, err := request.Size(m.URL, url)
119+
if err != nil {
120+
return nil, errors.WithStack(err)
121+
}
122+
123+
part := &extractors.Part{
124+
URL: m.URL,
125+
Size: fileSize,
126+
Ext: ext,
127+
}
128+
parts = append(parts, part)
129+
}
130+
131+
for _, part := range parts {
132+
totalSize += part.Size
133+
}
134+
135+
streams := map[string]*extractors.Stream{
136+
"default": {
137+
Parts: parts,
138+
Size: totalSize,
139+
},
140+
}
141+
142+
return []*extractors.Data{
143+
{
144+
Site: "Threads www.threads.net",
145+
Title: title,
146+
Type: extractors.DataTypeImage,
147+
Streams: streams,
148+
URL: url,
149+
},
150+
}, nil
151+
}

extractors/threads/threads_test.go

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
package threads_test
2+
3+
import (
4+
"testing"
5+
6+
"github.com/iawia002/lux/extractors"
7+
"github.com/iawia002/lux/extractors/threads"
8+
"github.com/iawia002/lux/test"
9+
)
10+
11+
func TestDownload(t *testing.T) {
12+
tests := []struct {
13+
name string
14+
args test.Args
15+
}{
16+
{
17+
name: "video test",
18+
args: test.Args{
19+
URL: "https://www.threads.net/@rowancheung/post/C9xPmHcpfiN",
20+
Title: `Threads @rowancheung - C9xPmHcpfiN`,
21+
Size: 5740684,
22+
},
23+
},
24+
{
25+
name: "video shared test",
26+
args: test.Args{
27+
URL: "https://www.threads.net/@zuck/post/C9xRqbNPbx2",
28+
Title: `Threads @zuck - C9xRqbNPbx2`,
29+
Size: 5740684,
30+
},
31+
},
32+
{
33+
name: "image test",
34+
args: test.Args{
35+
URL: "https://www.threads.net/@zuck/post/C-BoS7lM8sH",
36+
Title: `Threads @zuck - C-BoS7lM8sH`,
37+
Size: 159331,
38+
},
39+
},
40+
{
41+
name: "hybrid album test",
42+
args: test.Args{
43+
URL: "https://www.threads.net/@meta/post/C95Z1DrPNhi",
44+
Title: `Threads @meta - C95Z1DrPNhi`,
45+
Size: 1131229,
46+
},
47+
},
48+
}
49+
for _, tt := range tests {
50+
t.Run(tt.name, func(t *testing.T) {
51+
data, err := threads.New().Extract(tt.args.URL, extractors.Options{})
52+
test.CheckError(t, err)
53+
test.Check(t, tt.args, data[0])
54+
})
55+
}
56+
}

0 commit comments

Comments
 (0)