# -*- coding: utf-8 -*-
import sys
import tokenize


class CouldNotHandleEncoding(Exception):
    def __init__(self, path, cause):
        self.path = path
        self.cause = cause


def read_py_file(filepath):
    if sys.version_info < (3, ):
        return open(filepath, 'rU').read()
    else:
        # see https://docs.python.org/3/library/tokenize.html#tokenize.detect_encoding
        # first just see if the file is properly encoded
        try:
            with open(filepath, 'rb') as f:
                tokenize.detect_encoding(f.readline)
        except SyntaxError as err:
            # this warning is issued:
            #   (1) in badly authored files (contains non-utf8 in a comment line)
            #   (2) a coding is specified, but wrong and
            #   (3) no coding is specified, and the default
            #       'utf8' fails to decode.
            #   (4) the encoding specified by a pep263 declaration did not match
            #       with the encoding detected by inspecting the BOM
            raise CouldNotHandleEncoding(filepath, err)

        try:
            return tokenize.open(filepath).read()
            # this warning is issued:
            #   (1) if uft-8 is specified, but latin1 is used with something like \x0e9 appearing
            #       (see http://stackoverflow.com/a/5552623)
        except UnicodeDecodeError as err:
            raise CouldNotHandleEncoding(filepath, err)