@@ -693,14 +693,18 @@ def _download_file(
693
693
# TODO: how do we discover the total size????
694
694
# TODO: do not do it in-place, but rather into some "hidden" file
695
695
resuming = False
696
- for attempt in range (3 ):
696
+ attempt = 0
697
+ nattempts = 3 # number to do, could be incremented if we downloaded a little
698
+ while attempt <= nattempts :
699
+ attempt += 1
697
700
try :
698
701
if digester :
699
702
downloaded_digest = digester () # start empty
700
703
warned = False
701
704
# I wonder if we could make writing async with downloader
702
705
with DownloadDirectory (path , digests or {}) as dldir :
703
706
assert dldir .offset is not None
707
+ downloaded_in_attempt = 0
704
708
downloaded = dldir .offset
705
709
resuming = downloaded > 0
706
710
if size is not None and downloaded == size :
@@ -719,6 +723,7 @@ def _download_file(
719
723
assert downloaded_digest is not None
720
724
downloaded_digest .update (block )
721
725
downloaded += len (block )
726
+ downloaded_in_attempt += len (block )
722
727
# TODO: yield progress etc
723
728
out : dict [str , Any ] = {"done" : downloaded }
724
729
if size :
@@ -744,30 +749,83 @@ def _download_file(
744
749
# Catching RequestException lets us retry on timeout & connection
745
750
# errors (among others) in addition to HTTP status errors.
746
751
except requests .RequestException as exc :
752
+ sleep_amount = random .random () * 5 * attempt
753
+ if os .environ .get ("DANDI_DEVEL_AGGRESSIVE_RETRY" ):
754
+ # in such a case if we downloaded a little more --
755
+ # consider it a successful attempt
756
+ if downloaded_in_attempt > 0 :
757
+ lgr .debug (
758
+ "%s - download failed on attempt #%d: %s, "
759
+ "but did download %d bytes, so considering "
760
+ "it a success and incrementing number of allowed attempts." ,
761
+ path ,
762
+ attempt ,
763
+ exc ,
764
+ downloaded_in_attempt ,
765
+ )
766
+ nattempts += 1
747
767
# TODO: actually we should probably retry only on selected codes,
748
- # and also respect Retry-After
749
- if attempt >= 2 or (
750
- exc .response is not None
751
- and exc .response .status_code
752
- not in (
768
+ if exc .response is not None :
769
+ if exc .response .status_code not in (
753
770
400 , # Bad Request, but happened with gider:
754
771
# https://github.com/dandi/dandi-cli/issues/87
755
772
* RETRY_STATUSES ,
773
+ ):
774
+ lgr .debug (
775
+ "%s - download failed due to response %d: %s" ,
776
+ path ,
777
+ exc .response .status_code ,
778
+ exc ,
779
+ )
780
+ yield {"status" : "error" , "message" : str (exc )}
781
+ return
782
+ elif retry_after := exc .response .headers .get ("Retry-After" ):
783
+ # playing safe
784
+ if not str (retry_after ).isdigit ():
785
+ # our code is wrong, do not crash but issue warning so
786
+ # we might get report/fix it up
787
+ lgr .warning (
788
+ "%s - download failed due to response %d with non-integer"
789
+ " Retry-After=%r: %s" ,
790
+ path ,
791
+ exc .response .status_code ,
792
+ retry_after ,
793
+ exc ,
794
+ )
795
+ yield {"status" : "error" , "message" : str (exc )}
796
+ return
797
+ sleep_amount = int (retry_after )
798
+ lgr .debug (
799
+ "%s - download failed due to response %d with "
800
+ "Retry-After=%d: %s, will sleep and retry" ,
801
+ path ,
802
+ exc .response .status_code ,
803
+ sleep_amount ,
804
+ exc ,
805
+ )
806
+ else :
807
+ lgr .debug ("%s - download failed: %s" , path , exc )
808
+ yield {"status" : "error" , "message" : str (exc )}
809
+ return
810
+ elif attempt >= nattempts :
811
+ lgr .debug (
812
+ "%s - download failed after %d attempts: %s" , path , attempt , exc
756
813
)
757
- ):
758
- lgr .debug ("%s - download failed: %s" , path , exc )
759
814
yield {"status" : "error" , "message" : str (exc )}
760
815
return
761
816
# if is_access_denied(exc) or attempt >= 2:
762
817
# raise
763
818
# sleep a little and retry
764
- lgr .debug (
765
- "%s - failed to download on attempt #%d: %s, will sleep a bit and retry" ,
766
- path ,
767
- attempt ,
768
- exc ,
769
- )
770
- time .sleep (random .random () * 5 )
819
+ else :
820
+ lgr .debug (
821
+ "%s - download failed on attempt #%d: %s, will sleep a bit and retry" ,
822
+ path ,
823
+ attempt ,
824
+ exc ,
825
+ )
826
+ time .sleep (sleep_amount )
827
+ else :
828
+ lgr .warning ("downloader logic: We should not be here!" )
771
829
772
830
if downloaded_digest and not resuming :
773
831
assert downloaded_digest is not None
0 commit comments