summaryrefslogtreecommitdiff
path: root/lib/git
diff options
context:
space:
mode:
Diffstat (limited to 'lib/git')
-rw-r--r--lib/git/cmd.py7
-rw-r--r--lib/git/objects/base.py4
-rw-r--r--lib/git/objects/commit.py42
-rw-r--r--lib/git/objects/utils.py25
-rw-r--r--lib/git/odb/utils.py32
-rw-r--r--lib/git/repo.py43
-rw-r--r--lib/git/utils.py15
7 files changed, 104 insertions, 64 deletions
diff --git a/lib/git/cmd.py b/lib/git/cmd.py
index aaa27adc..18d1c505 100644
--- a/lib/git/cmd.py
+++ b/lib/git/cmd.py
@@ -323,12 +323,7 @@ class Git(object):
stdout_value = proc.stdout.read().rstrip() # strip trailing "\n"
else:
max_chunk_size = 1024*64
- while True:
- chunk = proc.stdout.read(max_chunk_size)
- output_stream.write(chunk)
- if len(chunk) < max_chunk_size:
- break
- # END reading output stream
+ stream_copy(proc.stdout, output_stream, max_chunk_size)
stdout_value = output_stream
# END stdout handling
stderr_value = proc.stderr.read().rstrip() # strip trailing "\n"
diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py
index 64a5678e..f7043199 100644
--- a/lib/git/objects/base.py
+++ b/lib/git/objects/base.py
@@ -125,8 +125,8 @@ class Object(LazyMixin):
Returns
File Object compatible stream to the uncompressed raw data of the object
"""
- sha, type, size, stream = self.repo.git.stream_object_data(self.sha)
- return stream
+ proc = self.repo.git.cat_file(self.type, self.sha, as_process=True)
+ return utils.ProcessStreamAdapter(proc, "stdout")
def stream_data(self, ostream):
"""
diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py
index 98aca360..d56ce306 100644
--- a/lib/git/objects/commit.py
+++ b/lib/git/objects/commit.py
@@ -91,15 +91,6 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
"""
super(Commit,self).__init__(repo, sha)
self._set_self_from_args_(locals())
-
- if parents is not None:
- cls = type(self)
- self.parents = tuple(cls(repo, p) for p in parents if not isinstance(p, cls))
- # END for each parent to convert
-
- if self.sha and tree is not None:
- self.tree = Tree(repo, tree, path='')
- # END id to tree conversion
@classmethod
def _get_intermediate_items(cls, commit):
@@ -350,7 +341,12 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
committer, committer_time, committer_offset,
message, parent_commits, conf_encoding)
- # serialize !
+ stream = StringIO()
+ new_commit._serialize(stream)
+ streamlen = stream.tell()
+ stream.seek(0)
+
+ new_commit.sha = repo.odb.to_object(cls.type, streamlen, stream, sha_as_hex=True)
if head:
try:
@@ -377,8 +373,28 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
#{ Serializable Implementation
def _serialize(self, stream):
- # for now, this is very inefficient and in fact shouldn't be used like this
- return super(Commit, self)._serialize(stream)
+ write = stream.write
+ write("tree %s\n" % self.tree)
+ for p in self.parents:
+ write("parent %s\n" % p)
+
+ a = self.author
+ c = self.committer
+ fmt = "%s %s <%s> %s %s\n"
+ write(fmt % ("author", a.name, a.email,
+ self.authored_date,
+ utils.altz_to_utctz_str(self.author_tz_offset)))
+
+ write(fmt % ("committer", c.name, c.email,
+ self.committed_date,
+ utils.altz_to_utctz_str(self.committer_tz_offset)))
+
+ if self.encoding != self.default_encoding:
+ write("encoding %s\n" % self.encoding)
+
+ write("\n")
+ write(self.message)
+ return self
def _deserialize(self, stream):
""":param from_rev_list: if true, the stream format is coming from the rev-list command
@@ -416,7 +432,7 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
# a stream from our data simply gives us the plain message
# The end of our message stream is marked with a newline that we strip
- self.message = stream.read()[:-1]
+ self.message = stream.read()
return self
#} END serializable implementation
diff --git a/lib/git/objects/utils.py b/lib/git/objects/utils.py
index 6d378a72..c93f2091 100644
--- a/lib/git/objects/utils.py
+++ b/lib/git/objects/utils.py
@@ -16,7 +16,8 @@ import time
import os
__all__ = ('get_object_type_by_name', 'get_user_id', 'parse_date', 'parse_actor_and_date',
- 'ProcessStreamAdapter', 'Traversable')
+ 'ProcessStreamAdapter', 'Traversable', 'altz_to_utctz_str', 'utctz_to_altz',
+ 'verify_utctz')
def get_object_type_by_name(object_type_name):
"""
@@ -57,14 +58,24 @@ def get_user_id():
return "%s@%s" % (username, platform.node())
-def _utc_tz_to_altz(utctz):
+def utctz_to_altz(utctz):
"""we convert utctz to the timezone in seconds, it is the format time.altzone
returns. Git stores it as UTC timezon which has the opposite sign as well,
which explains the -1 * ( that was made explicit here )
:param utctz: git utc timezone string, i.e. +0200"""
return -1 * int(float(utctz)/100*3600)
+
+def altz_to_utctz_str(altz):
+ """As above, but inverses the operation, returning a string that can be used
+ in commit objects"""
+ utci = -1 * int((altz / 3600)*100)
+ utcs = str(abs(utci))
+ utcs = "0"*(4-len(utcs)) + utcs
+ prefix = (utci < 0 and '-') or '+'
+ return prefix + utcs
+
-def _verify_utctz(offset):
+def verify_utctz(offset):
""":raise ValueError: if offset is incorrect
:return: offset"""
fmt_exc = ValueError("Invalid timezone offset format: %s" % offset)
@@ -97,11 +108,11 @@ def parse_date(string_date):
if string_date.count(' ') == 1 and string_date.rfind(':') == -1:
timestamp, offset = string_date.split()
timestamp = int(timestamp)
- return timestamp, _utc_tz_to_altz(_verify_utctz(offset))
+ return timestamp, utctz_to_altz(verify_utctz(offset))
else:
offset = "+0000" # local time by default
if string_date[-5] in '-+':
- offset = _verify_utctz(string_date[-5:])
+ offset = verify_utctz(string_date[-5:])
string_date = string_date[:-6] # skip space as well
# END split timezone info
@@ -139,7 +150,7 @@ def parse_date(string_date):
fstruct = time.struct_time((dtstruct.tm_year, dtstruct.tm_mon, dtstruct.tm_mday,
tstruct.tm_hour, tstruct.tm_min, tstruct.tm_sec,
dtstruct.tm_wday, dtstruct.tm_yday, tstruct.tm_isdst))
- return int(time.mktime(fstruct)), _utc_tz_to_altz(offset)
+ return int(time.mktime(fstruct)), utctz_to_altz(offset)
except ValueError:
continue
# END exception handling
@@ -167,7 +178,7 @@ def parse_actor_and_date(line):
"""
m = _re_actor_epoch.search(line)
actor, epoch, offset = m.groups()
- return (Actor._from_string(actor), int(epoch), _utc_tz_to_altz(offset))
+ return (Actor._from_string(actor), int(epoch), utctz_to_altz(offset))
diff --git a/lib/git/odb/utils.py b/lib/git/odb/utils.py
index 94d1cea8..fd340962 100644
--- a/lib/git/odb/utils.py
+++ b/lib/git/odb/utils.py
@@ -137,7 +137,7 @@ class DecompressMemMapReader(object):
# END handle size
# read header
- maxb = 8192
+ maxb = 512 # should really be enough, cgit uses 8192 I believe
self._s = maxb
hdr = self.read(maxb)
hdrend = hdr.find("\0")
@@ -172,20 +172,24 @@ class DecompressMemMapReader(object):
# Our performance now depends on StringIO. This way we don't need two large
# buffers in peak times, but only one large one in the end which is
# the return buffer
- if size > self.max_read_size:
- sio = StringIO()
- while size:
- read_size = min(self.max_read_size, size)
- data = self.read(read_size)
- sio.write(data)
- size -= len(data)
- if len(data) < read_size:
- break
- # END data loop
- sio.seek(0)
- return sio.getvalue()
- # END handle maxread
+ # NO: We don't do it - if the user thinks its best, he is right. If he
+ # has trouble, he will start reading in chunks. According to our tests
+ # its still faster if we read 10 Mb at once instead of chunking it.
+ # if size > self.max_read_size:
+ # sio = StringIO()
+ # while size:
+ # read_size = min(self.max_read_size, size)
+ # data = self.read(read_size)
+ # sio.write(data)
+ # size -= len(data)
+ # if len(data) < read_size:
+ # break
+ # # END data loop
+ # sio.seek(0)
+ # return sio.getvalue()
+ # # END handle maxread
+ #
# deplete the buffer, then just continue using the decompress object
# which has an own buffer. We just need this to transparently parse the
# header from the zlib stream
diff --git a/lib/git/repo.py b/lib/git/repo.py
index f4caa3fb..0bd2249c 100644
--- a/lib/git/repo.py
+++ b/lib/git/repo.py
@@ -4,12 +4,6 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-import os
-import sys
-import re
-import gzip
-import StringIO
-
from errors import InvalidGitRepositoryError, NoSuchPathError
from cmd import Git
from actor import Actor
@@ -19,6 +13,15 @@ from objects import *
from config import GitConfigParser
from remote import Remote
+from odb.db import LooseObjectDB
+
+import os
+import sys
+import re
+import gzip
+import StringIO
+
+
def touch(filename):
fp = open(filename, "a")
fp.close()
@@ -53,7 +56,7 @@ class Repo(object):
'git_dir' is the .git repository directoy, which is always set.
"""
DAEMON_EXPORT_FILE = 'git-daemon-export-ok'
- __slots__ = ( "working_dir", "_working_tree_dir", "git_dir", "_bare", "git" )
+ __slots__ = ( "working_dir", "_working_tree_dir", "git_dir", "_bare", "git", "odb" )
# precompiled regex
re_whitespace = re.compile(r'\s+')
@@ -65,27 +68,22 @@ class Repo(object):
# represents the configuration level of a configuration file
config_level = ("system", "global", "repository")
- def __init__(self, path=None):
- """
- Create a new Repo instance
-
- ``path``
- is the path to either the root git directory or the bare git repo
+ def __init__(self, path=None, odbt = LooseObjectDB):
+ """ Create a new Repo instance
- Examples::
+ :param path: is the path to either the root git directory or the bare git repo::
repo = Repo("/Users/mtrier/Development/git-python")
repo = Repo("/Users/mtrier/Development/git-python.git")
repo = Repo("~/Development/git-python.git")
repo = Repo("$REPOSITORIES/Development/git-python.git")
-
- Raises
- InvalidGitRepositoryError or NoSuchPathError
-
- Returns
- ``git.Repo``
- """
-
+
+ :param odbt: Object DataBase type - a type which is constructed by providing
+ the directory containing the database objects, i.e. .git/objects. It will
+ be used to access all object data
+ :raise InvalidGitRepositoryError:
+ :raise NoSuchPathError:
+ :return: git.Repo """
epath = os.path.abspath(os.path.expandvars(os.path.expanduser(path or os.getcwd())))
if not os.path.exists(epath):
@@ -130,6 +128,7 @@ class Repo(object):
self.working_dir = self._working_tree_dir or self.git_dir
self.git = Git(self.working_dir)
+ self.odb = odbt(os.path.join(self.git_dir, 'objects'))
def __eq__(self, rhs):
if isinstance(rhs, Repo):
diff --git a/lib/git/utils.py b/lib/git/utils.py
index 360c77c9..60a7de48 100644
--- a/lib/git/utils.py
+++ b/lib/git/utils.py
@@ -27,6 +27,21 @@ def make_sha(source=''):
sha1 = sha.sha(source)
return sha1
+def stream_copy(source, destination, chunk_size=512*1024):
+ """Copy all data from the source stream into the destination stream in chunks
+ of size chunk_size
+ :return: amount of bytes written"""
+ br = 0
+ while True:
+ chunk = source.read(chunk_size)
+ destination.write(chunk)
+ br += len(chunk)
+ if len(chunk) < chunk_size:
+ break
+ # END reading output stream
+ return br
+
+
def join_path(a, *p):
"""Join path tokens together similar to os.path.join, but always use
'/' instead of possibly '\' on windows."""