1
1
import { Buffer } from 'node:buffer'
2
2
import { createHash } from 'node:crypto'
3
+ import dns from 'node:dns'
4
+ import http from 'node:http'
3
5
import https from 'node:https'
4
6
import { join } from 'node:path'
5
7
import axios from 'axios'
6
8
import { ensureDirSync } from 'fs-extra'
7
9
import sanitize from 'sanitize-filename'
8
10
import slugify from 'slugify'
9
11
import { joinURL , withLeadingSlash , withoutLeadingSlash , withoutTrailingSlash , withTrailingSlash } from 'ufo'
10
- import type { AxiosRequestConfig , AxiosResponse } from 'axios'
11
- import { useUnlighthouse } from './unlighthouse'
12
+ import type { AxiosInstance , AxiosRequestConfig , AxiosResponse } from 'axios'
13
+ import { useLogger , useUnlighthouse } from './unlighthouse'
12
14
import type { NormalisedRoute , ResolvedUserConfig , UnlighthouseRouteReport } from './types'
13
15
14
16
export const ReportArtifacts = {
@@ -121,12 +123,30 @@ export function formatBytes(bytes: number, decimals = 2) {
121
123
return `${ Number . parseFloat ( ( bytes / k ** i ) . toFixed ( dm ) ) } ${ sizes [ i ] } `
122
124
}
123
125
124
- export async function fetchUrlRaw ( url : string , resolvedConfig : ResolvedUserConfig ) : Promise < { error ?: any , redirected ?: boolean , redirectUrl ?: string , valid : boolean , response ?: AxiosResponse } > {
126
+ const _sharedContext = { }
127
+
128
+ function sharedContext ( ) {
129
+ return useUnlighthouse ( ) || _sharedContext
130
+ }
131
+
132
+ export async function createAxiosInstance ( resolvedConfig : ResolvedUserConfig ) {
133
+ // try and resolve dns lookup issues
134
+ dns . setServers ( [
135
+ '8.8.8.8' , // Google
136
+ '1.1.1.1' , // Cloudflare
137
+ ] )
138
+ const resolver = new dns . Resolver ( )
139
+ resolver . setServers ( [
140
+ '8.8.8.8' , // Google
141
+ '1.1.1.1' , // Cloudflare
142
+ ] )
125
143
const axiosOptions : AxiosRequestConfig = { }
126
144
if ( resolvedConfig . auth )
127
145
axiosOptions . auth = resolvedConfig . auth
128
146
129
147
axiosOptions . headers = axiosOptions . headers || { }
148
+ // this should always be set
149
+ axiosOptions . headers [ 'User-Agent' ] = resolvedConfig . lighthouseOptions . emulatedUserAgent || 'Unlighthouse'
130
150
131
151
if ( resolvedConfig . cookies ) {
132
152
axiosOptions . headers . Cookie = resolvedConfig . cookies
@@ -148,44 +168,85 @@ export async function fetchUrlRaw(url: string, resolvedConfig: ResolvedUserConfi
148
168
149
169
axiosOptions . httpsAgent = new https . Agent ( {
150
170
rejectUnauthorized : false ,
171
+ keepAlive : true ,
172
+ timeout : 30_000 ,
173
+ } )
174
+ axiosOptions . httpAgent = new http . Agent ( {
175
+ keepAlive : true ,
176
+ timeout : 30_000 ,
151
177
} )
178
+ axiosOptions . proxy = false
179
+ axiosOptions . timeout = 30_000
152
180
axiosOptions . withCredentials = true
153
- try {
154
- const response = await axios . get ( url , axiosOptions )
155
- let responseUrl = response . request . res . responseUrl
156
- if ( responseUrl && axiosOptions . auth ) {
157
- // remove auth credentials from url (e.g. https://user:[email protected] )
158
- responseUrl = responseUrl . replace ( / (?< = h t t p s ? : \/ \/ ) ( .+ ?@ ) / g, '' )
159
- }
160
- const redirected = responseUrl && responseUrl !== url
161
- const redirectUrl = responseUrl
162
- if ( response . status < 200 || ( response . status >= 300 && ! redirected ) ) {
181
+ const unlighthouse = sharedContext ( )
182
+ unlighthouse . _axios = axios . create ( axiosOptions )
183
+ return unlighthouse . _axios
184
+ }
185
+
186
+ export async function fetchUrlRaw ( url : string , resolvedConfig : ResolvedUserConfig ) : Promise < { error ?: any , redirected ?: boolean , redirectUrl ?: string , valid : boolean , response ?: AxiosResponse } > {
187
+ const logger = useLogger ( )
188
+ const unlighthouse = sharedContext ( )
189
+ const instance : AxiosInstance = unlighthouse . _axios || await createAxiosInstance ( resolvedConfig )
190
+ const maxRetries = 3
191
+ let attempt = 0
192
+
193
+ while ( attempt < maxRetries ) {
194
+ try {
195
+ const response = await instance . get ( url , { timeout : 30_000 } )
196
+ let responseUrl = response . request . res . responseUrl
197
+ if ( responseUrl && resolvedConfig . auth ) {
198
+ // remove auth credentials from url (e.g. https://user:[email protected] )
199
+ responseUrl = responseUrl . replace ( / (?< = h t t p s ? : \/ \/ ) ( .+ ?@ ) / g, '' )
200
+ }
201
+ const redirected = responseUrl && responseUrl !== url
202
+ const redirectUrl = responseUrl
203
+ if ( response . status < 200 || ( response . status >= 300 && ! redirected ) ) {
204
+ return {
205
+ valid : false ,
206
+ redirected,
207
+ response,
208
+ redirectUrl,
209
+ }
210
+ }
163
211
return {
164
- valid : false ,
212
+ valid : true ,
165
213
redirected,
166
214
response,
167
215
redirectUrl,
168
216
}
169
217
}
170
- return {
171
- valid : true ,
172
- redirected,
173
- response,
174
- redirectUrl,
218
+ catch ( e : any ) {
219
+ if ( e . errors ) {
220
+ logger . error ( 'Axios error:' , e . errors )
221
+ }
222
+ logger . error ( 'Axios error message:' , e . message )
223
+ logger . error ( 'Axios error code:' , e . code )
224
+ if ( e . response ) {
225
+ logger . error ( 'Axios error response data:' , e . response . data )
226
+ logger . error ( 'Axios error response status:' , e . response . status )
227
+ logger . error ( 'Axios error response headers:' , e . response . headers )
228
+ }
229
+ if ( e . code === 'ETIMEDOUT' || e . code === 'ENETUNREACH' ) {
230
+ attempt ++
231
+ logger . info ( `Retrying request... (${ attempt } /${ maxRetries } )` )
232
+ continue
233
+ }
234
+ return {
235
+ error : e ,
236
+ valid : false ,
237
+ }
175
238
}
176
239
}
177
- catch ( e ) {
178
- return {
179
- error : e ,
180
- valid : false ,
181
- }
240
+ return {
241
+ error : new Error ( 'Max retries reached' ) ,
242
+ valid : false ,
182
243
}
183
244
}
184
245
185
246
export function asRegExp ( rule : string | RegExp ) : RegExp {
186
247
if ( rule instanceof RegExp )
187
248
return rule
188
- // need to escape the string for use in a RegExp but allow basic path characters like /
249
+ // need to escape the string for use in a RegExp but allow basic path characters like /
189
250
rule = rule . replace ( / [ - { } ( ) + ? . , \\ ^ | # \s ] / g, '\\$&' )
190
251
return new RegExp ( rule )
191
252
}
0 commit comments