from
bs4
import
BeautifulSoup
import
requests
import
re
from
Crypto.Cipher
import
AES
import
os
def
tsList(Index):
with
open
(os.getcwd()
+
'/m3u8.txt'
,
'r'
) as f:
if
'.ts'
in
f.read():
print
(
'ts视频链接均已存储,无需重复请求'
)
else
:
print
(
'开始获取并存储ts链接'
)
with
open
(os.getcwd()
+
'/m3u8.txt'
,
'r'
) as f:
m3u8Url
=
f.readlines()[
0
].strip()
content
=
requests.get(m3u8Url,headers
=
headers)
jiami
=
re.findall(
'#EXT-X-KEY:(.*)\n'
,content.text)
m3u8Url_before
=
''
if
len
(jiami)>
0
:
key
=
str
(re.findall(
'URI="(.*)"'
,jiami[
0
]))[
2
:
-
2
]
if
'http'
not
in
key:
m3u8Start
=
m3u8Url.find(
"\"url\":\""
)
+
7
m3u8End
=
m3u8Url.find(
".m3u8"
)
+
5
m3u8Url
=
m3u8Url[m3u8Start:m3u8End].replace(
'\\','
')
m3u8Url_before
=
"https://"
+
m3u8Url.split(
'/'
)[
2
]
else
:
m3u8Url_before
=
''
keycontent
=
requests.get(m3u8Url_before
+
key,headers).text
with
open
(os.getcwd()
+
'/m3u8.txt'
,
'a'
) as f:
f.write(keycontent
+
'\n'
)
else
:
with
open
(os.getcwd()
+
'/m3u8.txt'
,
'a'
) as f:
f.write(
'000000000000'
)
if
(content.status_code
=
=
200
):
pattern
=
re.
compile
(r
'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
)
content
=
content.text.split(
","
)
index
=
0
for
item
in
content:
if
'http'
not
in
item:
index
+
=
1
temp
=
(m3u8Url_before
+
item).replace(
"\n"
, "")
url
=
str
(pattern.findall(temp))[
2
:
-
2
]
with
open
(os.getcwd()
+
'/m3u8.txt'
,
'a'
) as f:
f.write(url
+
'\n'
)
else
:
index
+
=
1
url
=
str
(pattern.findall(item))[
2
:
-
2
]
with
open
(os.getcwd()
+
'/m3u8.txt'
,
'a'
) as f:
f.write(url
+
'\n'
)
Download(Index)
def
Download(Index):
now
=
videoName
+
"_第"
+
str
(Index
+
1
)
+
'集'
index
=
0
try
:
with
open
(os.getcwd()
+
'/index.txt'
,
'r'
) as f3:
index
=
int
(
float
(f3.read()))
except
FileNotFoundError as e:
index
=
4
index1
=
index
with
open
(os.getcwd()
+
'/m3u8.txt'
,
'r'
) as getKey:
keycontent
=
getKey.readlines()[
1
][
0
:
-
1
]
print
(keycontent)
if
keycontent
=
=
'000000000000'
:
print
(
"未加密"
)
else
:
cryptor
=
AES.new(keycontent.encode(
'utf-8'
), AES.MODE_CBC, keycontent.encode(
'utf-8'
))
with
open
(os.getcwd()
+
'/m3u8.txt'
,
'r'
) as getTsUrlList:
tsList
=
getTsUrlList.readlines()[index:]
tsListlen
=
len
(tsList)
for
i
in
tsList[
1
:]:
print
(
"预估进度:"
+
str
(index1
-
3
)
+
'/'
+
str
(tsListlen
-
1
)
+
" 即将下载:"
+
i)
res
=
''
for
item
in
range
(
1
,
10
):
try
:
response
=
requests.get(i, headers
=
headers,timeout
=
3
)
except
Exception as e:
print
(i[
-
12
:
-
1
]
+
'请求超时,重新请求第'
+
str
(item)
+
'次'
)
continue
if
(response.status_code
=
=
200
):
print
(i[
-
12
:
-
1
]
+
'请求成功'
)
res
=
response
if
keycontent
=
=
'000000000000'
:
print
(
'未加密,直接追加'
)
cont
=
res.content
else
:
try
:
cont
=
cryptor.decrypt(res.content)
except
:
pass
with
open
(os.getcwd()
+
'/'
+
now
+
'.mp4'
,
'ab+'
) as f:
f.write(cont)
f.close()
index1
+
=
1
print
(
"追加完成,已标记"
)
with
open
(os.getcwd()
+
'/index.txt'
,
'w'
) as f:
f.write(
str
(index1))
f.close()
break
else
:
continue
with
open
(os.getcwd()
+
'/jishu.txt'
,
'w'
) as indexFile:
indexFile.write(
str
(Index
+
1
)
+
' \n'
)
with
open
(os.getcwd()
+
'/m3u8.txt'
,
'a'
) as indexFile:
indexFile.truncate(
0
)
with
open
(os.getcwd()
+
'/index.txt'
,
'w'
) as f:
f.write(
'0'
)
f.close()
return
True
def
getM3u8(htmlUrl):
content
=
requests.get(htmlUrl,headers
=
headers).text
bsObj
=
BeautifulSoup(content,
"html.parser"
)
index
=
0
for
scriptItem
in
bsObj.findAll(
"script"
):
index
+
=
1
if
'.m3u8'
in
str
(scriptItem):
pattern
=
re.
compile
(r
'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
)
m3u8Url
=
pattern.search(
str
(scriptItem)).group()
global
m3u8Url_before
m3u8Url_before
=
getDomain(m3u8Url)
if
'hls'
not
in
m3u8Url:
content
=
requests.get(m3u8Url,headers
=
headers).text
count
=
0
nList
=
[]
for
item
in
list
(content):
count
+
=
1
if
item
=
=
'\n'
:
nList.append(count)
m3u8Url_hls
=
str
(content)[nList[
-
2
]:nList[
-
1
]]
if
'http'
not
in
m3u8Url_hls:
m3u8Url_hls
=
m3u8Url_before
+
m3u8Url_hls
else
:
m3u8Url_hls
=
m3u8Url
if
'ENDLIST'
not
in
m3u8Url_hls:
with
open
(os.getcwd()
+
'/m3u8.txt'
,
'w'
) as f:
f.write(m3u8Url_hls)
return
(
1
)
else
:
content
=
requests.get(m3u8Url,headers
=
headers).text.split(
","
)
with
open
(os.getcwd()
+
'/m3u8.txt'
,
'a+'
) as f:
f.write(
'hello~~'
+
'\n'
)
with
open
(os.getcwd()
+
'/m3u8.txt'
,
'a+'
) as f:
f.write(
'000000000000\n'
)
for
i
in
content:
if
'/'
in
i:
breakpoint
=
i[
1
:].index(
'\n'
)
+
1
with
open
(os.getcwd()
+
'/m3u8.txt'
,
'a+'
) as f:
f.write(i[
1
:breakpoint]
+
'\n'
)
return
(
2
)
else
:
if
index
=
=
len
(
list
(bsObj.findAll(
"script"
))):
print
(
"在页面Script中未找到相关m3u8链接。。。"
)
return
(
3
)
def
jiexiHtml(htmlUrl):
content
=
requests.get(htmlUrl,headers
=
headers).text
bsObj
=
BeautifulSoup(content,
"html.parser"
)
global
videoName
videoName
=
bsObj.find(
'h1'
,{
'class'
,
'page-title'
}).text
videoItemHtml
=
bsObj.find(
'div'
,{
'class'
:
'scroll-content'
}).findAll(
'a'
,href
=
re.
compile
(
"^(/ShowInfo/)((?!:).)*$"
))
with
open
(os.getcwd()
+
'/'
+
videoName
+
'.txt'
,
'w'
) as indexFile:
indexFile.truncate(
0
)
for
i
in
videoItemHtml:
item
=
str
(i.attrs[
'href'
])
if
'http'
not
in
item:
Domain
=
getDomain(htmlUrl)
with
open
(os.getcwd()
+
'/'
+
videoName
+
'.txt'
,
'a'
) as indexFile:
indexFile.write(Domain
+
item
+
'\n'
)
else
:
with
open
(os.getcwd()
+
'/'
+
videoName
+
'.txt'
,
'a'
) as indexFile:
indexFile.write(item
+
'\n'
)
print
(
"indexFile已更新"
)
try
:
with
open
(os.getcwd()
+
'/jishu.txt'
,
'r'
) as indexFile:
pass
except
FileNotFoundError as e:
print
(
"jishu不存在,已创建并置零"
)
with
open
(os.getcwd()
+
'/jishu.txt'
,
'a'
) as indexFile:
indexFile.write(
'0 \n'
)
with
open
(os.getcwd()
+
'/'
+
videoName
+
'.txt'
,
'r'
) as indexFile:
HtmlArr
=
indexFile.readlines()
with
open
(os.getcwd()
+
'/jishu.txt'
,
'r'
) as indexFile:
jishu
=
int
(indexFile.read())
print
(
"txt记录集数为:"
+
str
(jishu))
for
htmlItem
in
HtmlArr[jishu:]:
with
open
(os.getcwd()
+
'/jishu.txt'
,
'r'
) as indexFile:
jishuRun
=
int
(indexFile.read())
print
(
"txt记录集数为:"
+
str
(jishuRun))
openHtml(htmlItem,jishuRun)
def
openHtml(htmlItem,jishu):
print
(
"当前打开链接:"
+
htmlItem)
pd
=
getM3u8(htmlItem)
if
(pd
=
=
1
):
tsList(jishu)
elif
(pd
=
=
2
):
if
Download(jishu):
return
True
else
:
pass
def
getDomain(htmlUrl):
pattern
=
re.
compile
(r
'http[s]?://[a-zA-Z\-.0-9]+(?=\/)'
)
return
(
str
(pattern.search(htmlUrl).group()))
def
searchVideo(searchName):
content
=
requests.get(
'你找的影院地址'
,headers
=
headers,params
=
{
'searchword'
: searchName})
bsObj
=
BeautifulSoup(content.text,
"html.parser"
)
VideoList
=
bsObj.findAll(
"a"
,{
"class"
:
"module-item-pic"
})
if
VideoList:
htmlUrlList
=
VideoList
return
htmlUrlList
else
:
return
False
if
__name__
=
=
'__main__'
:
searchName
=
input
(
'输入想搜索的剧名:'
)
global
headers
headers
=
{
}
htmlUrlList
=
searchVideo(searchName)
if
htmlUrlList:
li
=
0
for
i
in
htmlUrlList:
li
+
=
1
title
=
i.find(
'img'
,{
"class"
:
"lazyload"
}).attrs[
'alt'
]
print
(
str
(li)
+
'、'
+
title
+
'\n'
)
else
:
print
(
"未搜索到相关影片"
)
try
:
raise
RuntimeError(
'testError'
)
except
RuntimeError as e:
print
(
"程序即将中断"
)
os._exit(
0
)
liNo
=
input
(
'选择需要尝试下载的影片的编号:'
)
if
htmlUrlList[
int
(liNo)
-
1
]:
htmlUrl
=
htmlUrlList[
int
(liNo)
-
1
].attrs[
'href'
]
if
os.path.exists(os.getcwd()
+
'/'
+
searchName):
pass
else
:
os.makedirs(os.getcwd()
+
'/'
+
searchName)
os.chdir( os.getcwd()
+
'/'
+
searchName )
jiexiHtml(
'你找的影院地址'
+
htmlUrl)
else
:
print
(
"数值超出范围"
)