码迷,mamicode.com
首页 > 编程语言 > 详细

Python之基于十六进制判断文件类型

时间:2019-11-23 18:31:00      阅读:75      评论:0      收藏:0      [点我收藏+]

标签:usr   ict   pack   color   基于   break   import   ==   splay   

核心代码:

技术图片
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Author  : suk
import struct
from io import BytesIO


# 支持文件类型
# 用16进制字符串的目的是可以知道文件头是多少字节
# 各种文件头的长度不一样,少则2字符,长则8字符
def typeList(types):
    type_dict = {jpg: [FFD8FFE000104A464946],
                 png: [89504E470D0A1A0A0000],
                 gif: [47494638396126026F01],
                 tif: [49492A00227105008037],
                 bmp: [424D8E1B030000000000],
                 dwg: [41433130313500000000],
                 html: [3C21444F435459504520],
                 htm: [3C21646F637479706520],
                 css: [48544D4C207B0D0A0942],
                 js: [696B2E71623D696B2E71],
                 rtf: [7B5C727466315C616E73],
                 psd: [38425053000100000000],
                 eml: [46726F6D3A203D3F6762],
                 wps: [D0CF11E0A1B11AE10000],
                 mdb: [5374616E64617264204A],
                 ps: [252150532D41646F6265],
                 pdf: [255044462D312E],
                 rmvb: [2E524D46000000120001],
                 flv: [464C5601050000000900],
                 mp4: [00000020667479706D70],
                 mp3: [49443303000000002176],
                 mpg: [000001BA210001000180],
                 wmv: [3026B2758E66CF11A6D9],
                 wav: [52494646E27807005741],
                 avi: [52494646D07D60074156],
                 mid: [4D546864000000060001],
                 zip: [504B0304140000000800, 504B0304140000080800, 504B03040A0000080000],
                 rar: [526172211A0700CF9073],
                 ini: [235468697320636F6E66],
                 jar: [504B03040A0000000000],
                 exe: [4D5A9000030000000400],
                 jsp: [3C25402070616765206C],
                 mf: [4D616E69666573742D56],
                 xml: [3C3F786D6C2076657273],
                 sql: [494E5345525420494E54],
                 java: [7061636B616765207765],
                 bat: [406563686F206F66660D],
                 gz: [1F8B0800000000000000],
                 properties: [6C6F67346A2E726F6F74],
                 class: [CAFEBABE0000002E0041],
                 chm: [49545346030000006000],
                 mxp: [04000000010000001300],
                 docx: [504B0304140006000800, 504B03040A0000000000],
                 torrent: [6431303A637265617465],
                 mov: [6D6F6F76],
                 wpd: [FF575043],
                 dbx: [CFAD12FEC5FD746F],
                 pst: [2142444E],
                 qdf: [AC9EBD8F],
                 pwl: [E3828596],
                 ram: [2E7261FD]
                 }
    ret = {}
    for k_hex, v_prefix in type_dict.items():
        if k_hex in types:
            ret[k_hex] = v_prefix
    return ret


# 字节码转16进制字符串
def bytes2hex(bytes):
    num = len(bytes)
    hexstr = u""
    for i in range(num):
        t = u"%x" % bytes[i]
        if len(t) % 2:
            hexstr += u"0"
        hexstr += t
    return hexstr.upper()


# 获取文件类型
def file_type(filename):
    binfile = open(filename, rb)  # 必需二制字读取
    tl = typeList(types=["jpg", "zip", "docx"])
    ftype = None
    for type_name, hcode_list in tl.items():
        flag = False
        for hcode in hcode_list:
            numOfBytes = int(len(hcode) / 2)  # 需要读多少字节
            binfile.seek(0)  # 每次读取都要回到文件头,不然会一直往后读取
            hbytes = struct.unpack_from("B" * numOfBytes, binfile.read(numOfBytes))  # 一个 "B"表示一个字节
            f_hcode = bytes2hex(hbytes)  # 如果判断不出来,打印出这个值,往字典增加即可
            # print("上传数据流hex", s_hcode, ‘=‘, "代码字典hex", hcode)  # 如果判断不出来,打印出这个值,往字典增加即可
            if f_hcode == hcode:
                flag = True
                break
        if flag:
            ftype = type_name
            break
    binfile.close()
    return ftype


# 获取字节流类型
def stream_type(stream, types):
    """
    :param stream:流数据
    :param types:需要判断文件类型,格式:["jpg","jpn"]
    :return:
    """
    tl = typeList(types=types)
    ftype = None
    for type_name, hcode_list in tl.items():
        flag = False
        for hcode in hcode_list:
            numOfBytes = int(len(hcode) / 2)  # 需要读多少字节
            hbytes = struct.unpack_from("B" * numOfBytes, stream[0:numOfBytes])  # 一个 "B"表示一个字节
            s_hcode = bytes2hex(hbytes)
            # print("上传数据流hex", s_hcode, ‘=‘, "代码字典hex", hcode)  # 如果判断不出来,打印出这个值,往字典增加即可
            if s_hcode == hcode:
                flag = True
                break
        if flag:
            ftype = type_name
            break
    return ftype


def stream_split(stream, count=3):
    """
    主要处理流是分段获取的数据
    :param stream: 块流
    :param count: 取多少段合成来判断类型,默认三段
    :return:
    """
    block_stream = BytesIO()
    temp = 1
    for block in stream:
        block_stream.write(block)
        if temp == count:
            break
        temp += 1
    return block_stream.getvalue()
is_file_type.py
type_dict字典,根据自己上传的文件,来填写,数据来自互联网。

 

基于Flask的上传示例

@index.route(‘/upload‘, methods=[‘GET‘, ‘POST‘])
def upload():

    if request.method == ‘GET‘:
        return render_template(‘upload.html‘)

    upload_obj = request.files.get(‘code_file‘)

    if not upload_obj:
        return ‘没有选择文件上传‘

    ret = stream_type(stream_split(upload_obj.stream), ["jpg", "png", "pdf"])

    if not ret:
        return ‘上传失败,文件类型不匹配,类型必须 "jpg" or "png" or "pdf"‘

    file_name = upload_obj.filename
    upload_obj.save(os.path.join(‘files‘, file_name))

    return ‘上传文件成功‘

upload.html

{% extends ‘layout.html‘ %}
{% block content %}
<h1>上传代码</h1>
<form action="" method="post" enctype="multipart/form-data">
    <input type="file" name="code_file">
    <input type="submit" value="上传"></input>
</form>
{% endblock %}

 

开始上传文件:

上传不在列表中的文件类型

技术图片

 

 


上传在列表中的文件类型

技术图片

 

技术图片

 

技术图片

 

Python之基于十六进制判断文件类型

标签:usr   ict   pack   color   基于   break   import   ==   splay   

原文地址:https://www.cnblogs.com/ygbh/p/11918876.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!