##// END OF EJS Templates
feat(archive-cache): added retry mechanism, and some code cleanups
super-admin -
r5426:47a7e5de default
parent child Browse files
Show More
@@ -1,1715 +1,1716 b''
1 1 # Copyright (C) 2011-2023 RhodeCode GmbH
2 2 #
3 3 # This program is free software: you can redistribute it and/or modify
4 4 # it under the terms of the GNU Affero General Public License, version 3
5 5 # (only), as published by the Free Software Foundation.
6 6 #
7 7 # This program is distributed in the hope that it will be useful,
8 8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 10 # GNU General Public License for more details.
11 11 #
12 12 # You should have received a copy of the GNU Affero General Public License
13 13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 14 #
15 15 # This program is dual-licensed. If you wish to learn more about the
16 16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18 18
19 19 import itertools
20 20 import logging
21 21 import os
22 22 import collections
23 23 import urllib.request
24 24 import urllib.parse
25 25 import urllib.error
26 26 import pathlib
27 27 import time
28 28 import random
29 29
30 30 from pyramid.httpexceptions import HTTPNotFound, HTTPBadRequest, HTTPFound
31 31
32 32 from pyramid.renderers import render
33 33 from pyramid.response import Response
34 34
35 35 import rhodecode
36 36 from rhodecode.apps._base import RepoAppView
37 37
38 38
39 39 from rhodecode.lib import diffs, helpers as h, rc_cache
40 40 from rhodecode.lib import audit_logger
41 41 from rhodecode.lib.hash_utils import sha1_safe
42 42 from rhodecode.lib.rc_cache.archive_cache import (
43 get_archival_cache_store, get_archival_config, ArchiveCacheLock, archive_iterator)
43 get_archival_cache_store, get_archival_config, ArchiveCacheGenerationLock, archive_iterator)
44 44 from rhodecode.lib.str_utils import safe_bytes, convert_special_chars
45 45 from rhodecode.lib.view_utils import parse_path_ref
46 46 from rhodecode.lib.exceptions import NonRelativePathError
47 47 from rhodecode.lib.codeblocks import (
48 48 filenode_as_lines_tokens, filenode_as_annotated_lines_tokens)
49 49 from rhodecode.lib.utils2 import convert_line_endings, detect_mode
50 50 from rhodecode.lib.type_utils import str2bool
51 51 from rhodecode.lib.str_utils import safe_str, safe_int
52 52 from rhodecode.lib.auth import (
53 53 LoginRequired, HasRepoPermissionAnyDecorator, CSRFRequired)
54 54 from rhodecode.lib.vcs import path as vcspath
55 55 from rhodecode.lib.vcs.backends.base import EmptyCommit
56 56 from rhodecode.lib.vcs.conf import settings
57 57 from rhodecode.lib.vcs.nodes import FileNode
58 58 from rhodecode.lib.vcs.exceptions import (
59 59 RepositoryError, CommitDoesNotExistError, EmptyRepositoryError,
60 60 ImproperArchiveTypeError, VCSError, NodeAlreadyExistsError,
61 61 NodeDoesNotExistError, CommitError, NodeError)
62 62
63 63 from rhodecode.model.scm import ScmModel
64 64 from rhodecode.model.db import Repository
65 65
66 66 log = logging.getLogger(__name__)
67 67
68 68
69 69 def get_archive_name(db_repo_id, db_repo_name, commit_sha, ext, subrepos=False, path_sha='', with_hash=True):
70 70 # original backward compat name of archive
71 71 clean_name = safe_str(convert_special_chars(db_repo_name).replace('/', '_'))
72 72
73 73 # e.g vcsserver-id-abcd-sub-1-abcfdef-archive-all.zip
74 74 # vcsserver-id-abcd-sub-0-abcfdef-COMMIT_SHA-PATH_SHA.zip
75 75 id_sha = sha1_safe(str(db_repo_id))[:4]
76 76 sub_repo = 'sub-1' if subrepos else 'sub-0'
77 77 commit = commit_sha if with_hash else 'archive'
78 78 path_marker = (path_sha if with_hash else '') or 'all'
79 79 archive_name = f'{clean_name}-id-{id_sha}-{sub_repo}-{commit}-{path_marker}{ext}'
80 80
81 81 return archive_name
82 82
83 83
84 84 def get_path_sha(at_path):
85 85 return safe_str(sha1_safe(at_path)[:8])
86 86
87 87
88 88 def _get_archive_spec(fname):
89 89 log.debug('Detecting archive spec for: `%s`', fname)
90 90
91 91 fileformat = None
92 92 ext = None
93 93 content_type = None
94 94 for a_type, content_type, extension in settings.ARCHIVE_SPECS:
95 95
96 96 if fname.endswith(extension):
97 97 fileformat = a_type
98 98 log.debug('archive is of type: %s', fileformat)
99 99 ext = extension
100 100 break
101 101
102 102 if not fileformat:
103 103 raise ValueError()
104 104
105 105 # left over part of whole fname is the commit
106 106 commit_id = fname[:-len(ext)]
107 107
108 108 return commit_id, ext, fileformat, content_type
109 109
110 110
111 111 class RepoFilesView(RepoAppView):
112 112
113 113 @staticmethod
114 114 def adjust_file_path_for_svn(f_path, repo):
115 115 """
116 116 Computes the relative path of `f_path`.
117 117
118 118 This is mainly based on prefix matching of the recognized tags and
119 119 branches in the underlying repository.
120 120 """
121 121 tags_and_branches = itertools.chain(
122 122 repo.branches.keys(),
123 123 repo.tags.keys())
124 124 tags_and_branches = sorted(tags_and_branches, key=len, reverse=True)
125 125
126 126 for name in tags_and_branches:
127 127 if f_path.startswith(f'{name}/'):
128 128 f_path = vcspath.relpath(f_path, name)
129 129 break
130 130 return f_path
131 131
132 132 def load_default_context(self):
133 133 c = self._get_local_tmpl_context(include_app_defaults=True)
134 134 c.rhodecode_repo = self.rhodecode_vcs_repo
135 135 c.enable_downloads = self.db_repo.enable_downloads
136 136 return c
137 137
138 138 def _ensure_not_locked(self, commit_id='tip'):
139 139 _ = self.request.translate
140 140
141 141 repo = self.db_repo
142 142 if repo.enable_locking and repo.locked[0]:
143 143 h.flash(_('This repository has been locked by %s on %s')
144 144 % (h.person_by_id(repo.locked[0]),
145 145 h.format_date(h.time_to_datetime(repo.locked[1]))),
146 146 'warning')
147 147 files_url = h.route_path(
148 148 'repo_files:default_path',
149 149 repo_name=self.db_repo_name, commit_id=commit_id)
150 150 raise HTTPFound(files_url)
151 151
152 152 def forbid_non_head(self, is_head, f_path, commit_id='tip', json_mode=False):
153 153 _ = self.request.translate
154 154
155 155 if not is_head:
156 156 message = _('Cannot modify file. '
157 157 'Given commit `{}` is not head of a branch.').format(commit_id)
158 158 h.flash(message, category='warning')
159 159
160 160 if json_mode:
161 161 return message
162 162
163 163 files_url = h.route_path(
164 164 'repo_files', repo_name=self.db_repo_name, commit_id=commit_id,
165 165 f_path=f_path)
166 166 raise HTTPFound(files_url)
167 167
168 168 def check_branch_permission(self, branch_name, commit_id='tip', json_mode=False):
169 169 _ = self.request.translate
170 170
171 171 rule, branch_perm = self._rhodecode_user.get_rule_and_branch_permission(
172 172 self.db_repo_name, branch_name)
173 173 if branch_perm and branch_perm not in ['branch.push', 'branch.push_force']:
174 174 message = _('Branch `{}` changes forbidden by rule {}.').format(
175 175 h.escape(branch_name), h.escape(rule))
176 176 h.flash(message, 'warning')
177 177
178 178 if json_mode:
179 179 return message
180 180
181 181 files_url = h.route_path(
182 182 'repo_files:default_path', repo_name=self.db_repo_name, commit_id=commit_id)
183 183
184 184 raise HTTPFound(files_url)
185 185
186 186 def _get_commit_and_path(self):
187 187 default_commit_id = self.db_repo.landing_ref_name
188 188 default_f_path = '/'
189 189
190 190 commit_id = self.request.matchdict.get(
191 191 'commit_id', default_commit_id)
192 192 f_path = self._get_f_path(self.request.matchdict, default_f_path)
193 193 return commit_id, f_path
194 194
195 195 def _get_default_encoding(self, c):
196 196 enc_list = getattr(c, 'default_encodings', [])
197 197 return enc_list[0] if enc_list else 'UTF-8'
198 198
199 199 def _get_commit_or_redirect(self, commit_id, redirect_after=True):
200 200 """
201 201 This is a safe way to get commit. If an error occurs it redirects to
202 202 tip with proper message
203 203
204 204 :param commit_id: id of commit to fetch
205 205 :param redirect_after: toggle redirection
206 206 """
207 207 _ = self.request.translate
208 208
209 209 try:
210 210 return self.rhodecode_vcs_repo.get_commit(commit_id)
211 211 except EmptyRepositoryError:
212 212 if not redirect_after:
213 213 return None
214 214
215 215 add_new = upload_new = ""
216 216 if h.HasRepoPermissionAny(
217 217 'repository.write', 'repository.admin')(self.db_repo_name):
218 218 _url = h.route_path(
219 219 'repo_files_add_file',
220 220 repo_name=self.db_repo_name, commit_id=0, f_path='')
221 221 add_new = h.link_to(
222 222 _('add a new file'), _url, class_="alert-link")
223 223
224 224 _url_upld = h.route_path(
225 225 'repo_files_upload_file',
226 226 repo_name=self.db_repo_name, commit_id=0, f_path='')
227 227 upload_new = h.link_to(
228 228 _('upload a new file'), _url_upld, class_="alert-link")
229 229
230 230 h.flash(h.literal(
231 231 _('There are no files yet. Click here to %s or %s.') % (add_new, upload_new)), category='warning')
232 232 raise HTTPFound(
233 233 h.route_path('repo_summary', repo_name=self.db_repo_name))
234 234
235 235 except (CommitDoesNotExistError, LookupError) as e:
236 236 msg = _('No such commit exists for this repository. Commit: {}').format(commit_id)
237 237 h.flash(msg, category='error')
238 238 raise HTTPNotFound()
239 239 except RepositoryError as e:
240 240 h.flash(h.escape(safe_str(e)), category='error')
241 241 raise HTTPNotFound()
242 242
243 243 def _get_filenode_or_redirect(self, commit_obj, path, pre_load=None):
244 244 """
245 245 Returns file_node, if error occurs or given path is directory,
246 246 it'll redirect to top level path
247 247 """
248 248 _ = self.request.translate
249 249
250 250 try:
251 251 file_node = commit_obj.get_node(path, pre_load=pre_load)
252 252 if file_node.is_dir():
253 253 raise RepositoryError('The given path is a directory')
254 254 except CommitDoesNotExistError:
255 255 log.exception('No such commit exists for this repository')
256 256 h.flash(_('No such commit exists for this repository'), category='error')
257 257 raise HTTPNotFound()
258 258 except RepositoryError as e:
259 259 log.warning('Repository error while fetching filenode `%s`. Err:%s', path, e)
260 260 h.flash(h.escape(safe_str(e)), category='error')
261 261 raise HTTPNotFound()
262 262
263 263 return file_node
264 264
265 265 def _is_valid_head(self, commit_id, repo, landing_ref):
266 266 branch_name = sha_commit_id = ''
267 267 is_head = False
268 268 log.debug('Checking if commit_id `%s` is a head for %s.', commit_id, repo)
269 269
270 270 for _branch_name, branch_commit_id in repo.branches.items():
271 271 # simple case we pass in branch name, it's a HEAD
272 272 if commit_id == _branch_name:
273 273 is_head = True
274 274 branch_name = _branch_name
275 275 sha_commit_id = branch_commit_id
276 276 break
277 277 # case when we pass in full sha commit_id, which is a head
278 278 elif commit_id == branch_commit_id:
279 279 is_head = True
280 280 branch_name = _branch_name
281 281 sha_commit_id = branch_commit_id
282 282 break
283 283
284 284 if h.is_svn(repo) and not repo.is_empty():
285 285 # Note: Subversion only has one head.
286 286 if commit_id == repo.get_commit(commit_idx=-1).raw_id:
287 287 is_head = True
288 288 return branch_name, sha_commit_id, is_head
289 289
290 290 # checked branches, means we only need to try to get the branch/commit_sha
291 291 if repo.is_empty():
292 292 is_head = True
293 293 branch_name = landing_ref
294 294 sha_commit_id = EmptyCommit().raw_id
295 295 else:
296 296 commit = repo.get_commit(commit_id=commit_id)
297 297 if commit:
298 298 branch_name = commit.branch
299 299 sha_commit_id = commit.raw_id
300 300
301 301 return branch_name, sha_commit_id, is_head
302 302
303 303 def _get_tree_at_commit(self, c, commit_id, f_path, full_load=False, at_rev=None):
304 304
305 305 repo_id = self.db_repo.repo_id
306 306 force_recache = self.get_recache_flag()
307 307
308 308 cache_seconds = safe_int(
309 309 rhodecode.CONFIG.get('rc_cache.cache_repo.expiration_time'))
310 310 cache_on = not force_recache and cache_seconds > 0
311 311 log.debug(
312 312 'Computing FILE TREE for repo_id %s commit_id `%s` and path `%s`'
313 313 'with caching: %s[TTL: %ss]' % (
314 314 repo_id, commit_id, f_path, cache_on, cache_seconds or 0))
315 315
316 316 cache_namespace_uid = f'repo.{rc_cache.FILE_TREE_CACHE_VER}.{repo_id}'
317 317 region = rc_cache.get_or_create_region('cache_repo', cache_namespace_uid)
318 318
319 319 @region.conditional_cache_on_arguments(namespace=cache_namespace_uid, condition=cache_on)
320 320 def compute_file_tree(_name_hash, _repo_id, _commit_id, _f_path, _full_load, _at_rev):
321 321 log.debug('Generating cached file tree at for repo_id: %s, %s, %s',
322 322 _repo_id, _commit_id, _f_path)
323 323
324 324 c.full_load = _full_load
325 325 return render(
326 326 'rhodecode:templates/files/files_browser_tree.mako',
327 327 self._get_template_context(c), self.request, _at_rev)
328 328
329 329 return compute_file_tree(
330 330 self.db_repo.repo_name_hash, self.db_repo.repo_id, commit_id, f_path, full_load, at_rev)
331 331
332 332 def create_pure_path(self, *parts):
333 333 # Split paths and sanitize them, removing any ../ etc
334 334 sanitized_path = [
335 335 x for x in pathlib.PurePath(*parts).parts
336 336 if x not in ['.', '..']]
337 337
338 338 pure_path = pathlib.PurePath(*sanitized_path)
339 339 return pure_path
340 340
341 341 def _is_lf_enabled(self, target_repo):
342 342 lf_enabled = False
343 343
344 344 lf_key_for_vcs_map = {
345 345 'hg': 'extensions_largefiles',
346 346 'git': 'vcs_git_lfs_enabled'
347 347 }
348 348
349 349 lf_key_for_vcs = lf_key_for_vcs_map.get(target_repo.repo_type)
350 350
351 351 if lf_key_for_vcs:
352 352 lf_enabled = self._get_repo_setting(target_repo, lf_key_for_vcs)
353 353
354 354 return lf_enabled
355 355
356 356 @LoginRequired()
357 357 @HasRepoPermissionAnyDecorator(
358 358 'repository.read', 'repository.write', 'repository.admin')
359 359 def repo_archivefile(self):
360 360 # archive cache config
361 361 from rhodecode import CONFIG
362 362 _ = self.request.translate
363 363 self.load_default_context()
364 364 default_at_path = '/'
365 365 fname = self.request.matchdict['fname']
366 366 subrepos = self.request.GET.get('subrepos') == 'true'
367 367 with_hash = str2bool(self.request.GET.get('with_hash', '1'))
368 368 at_path = self.request.GET.get('at_path') or default_at_path
369 369
370 370 if not self.db_repo.enable_downloads:
371 371 return Response(_('Downloads disabled'))
372 372
373 373 try:
374 374 commit_id, ext, fileformat, content_type = \
375 375 _get_archive_spec(fname)
376 376 except ValueError:
377 377 return Response(_('Unknown archive type for: `{}`').format(
378 378 h.escape(fname)))
379 379
380 380 try:
381 381 commit = self.rhodecode_vcs_repo.get_commit(commit_id)
382 382 except CommitDoesNotExistError:
383 383 return Response(_('Unknown commit_id {}').format(
384 384 h.escape(commit_id)))
385 385 except EmptyRepositoryError:
386 386 return Response(_('Empty repository'))
387 387
388 388 # we used a ref, or a shorter version, lets redirect client ot use explicit hash
389 389 if commit_id != commit.raw_id:
390 390 fname=f'{commit.raw_id}{ext}'
391 391 raise HTTPFound(self.request.current_route_path(fname=fname))
392 392
393 393 try:
394 394 at_path = commit.get_node(at_path).path or default_at_path
395 395 except Exception:
396 396 return Response(_('No node at path {} for this repository').format(h.escape(at_path)))
397 397
398 398 path_sha = get_path_sha(at_path)
399 399
400 400 # used for cache etc, consistent unique archive name
401 401 archive_name_key = get_archive_name(
402 402 self.db_repo.repo_id, self.db_repo_name, commit_sha=commit.short_id, ext=ext, subrepos=subrepos,
403 403 path_sha=path_sha, with_hash=True)
404 404
405 405 if not with_hash:
406 406 path_sha = ''
407 407
408 408 # what end client gets served
409 409 response_archive_name = get_archive_name(
410 410 self.db_repo.repo_id, self.db_repo_name, commit_sha=commit.short_id, ext=ext, subrepos=subrepos,
411 411 path_sha=path_sha, with_hash=with_hash)
412 412
413 413 # remove extension from our archive directory name
414 414 archive_dir_name = response_archive_name[:-len(ext)]
415 415
416 416 archive_cache_disable = self.request.GET.get('no_cache')
417 417
418 418 d_cache = get_archival_cache_store(config=CONFIG)
419 419
420 420 # NOTE: we get the config to pass to a call to lazy-init the SAME type of cache on vcsserver
421 421 d_cache_conf = get_archival_config(config=CONFIG)
422 422
423 423 # This is also a cache key, and lock key
424 424 reentrant_lock_key = archive_name_key + '.lock'
425 425
426 426 use_cached_archive = False
427 427 if not archive_cache_disable and archive_name_key in d_cache:
428 428 reader, metadata = d_cache.fetch(archive_name_key)
429 429
430 430 use_cached_archive = True
431 431 log.debug('Found cached archive as key=%s tag=%s, serving archive from cache reader=%s',
432 432 archive_name_key, metadata, reader.name)
433 433 else:
434 434 reader = None
435 435 log.debug('Archive with key=%s is not yet cached, creating one now...', archive_name_key)
436 436
437 437 if not reader:
438 438 # generate new archive, as previous was not found in the cache
439 439 try:
440 440 with d_cache.get_lock(reentrant_lock_key):
441 441 try:
442 442 commit.archive_repo(archive_name_key, archive_dir_name=archive_dir_name,
443 443 kind=fileformat, subrepos=subrepos,
444 444 archive_at_path=at_path, cache_config=d_cache_conf)
445 445 except ImproperArchiveTypeError:
446 446 return _('Unknown archive type')
447 except ArchiveCacheLock:
447
448 except ArchiveCacheGenerationLock:
448 449 retry_after = round(random.uniform(0.3, 3.0), 1)
449 450 time.sleep(retry_after)
450 451
451 452 location = self.request.url
452 453 response = Response(
453 454 f"archive {archive_name_key} generation in progress, Retry-After={retry_after}, Location={location}"
454 455 )
455 456 response.headers["Retry-After"] = str(retry_after)
456 response.status_code = 307 # temporary redirect
457 response.status_code = 307 # temporary redirect
457 458
458 459 response.location = location
459 460 return response
460 461
461 462 reader, metadata = d_cache.fetch(archive_name_key)
462 463
463 464 response = Response(app_iter=archive_iterator(reader))
464 465 response.content_disposition = f'attachment; filename={response_archive_name}'
465 466 response.content_type = str(content_type)
466 467
467 468 try:
468 469 return response
469 470 finally:
470 471 # store download action
471 472 audit_logger.store_web(
472 473 'repo.archive.download', action_data={
473 474 'user_agent': self.request.user_agent,
474 475 'archive_name': archive_name_key,
475 476 'archive_spec': fname,
476 477 'archive_cached': use_cached_archive},
477 478 user=self._rhodecode_user,
478 479 repo=self.db_repo,
479 480 commit=True
480 481 )
481 482
482 483 def _get_file_node(self, commit_id, f_path):
483 484 if commit_id not in ['', None, 'None', '0' * 12, '0' * 40]:
484 485 commit = self.rhodecode_vcs_repo.get_commit(commit_id=commit_id)
485 486 try:
486 487 node = commit.get_node(f_path)
487 488 if node.is_dir():
488 489 raise NodeError(f'{node} path is a {type(node)} not a file')
489 490 except NodeDoesNotExistError:
490 491 commit = EmptyCommit(
491 492 commit_id=commit_id,
492 493 idx=commit.idx,
493 494 repo=commit.repository,
494 495 alias=commit.repository.alias,
495 496 message=commit.message,
496 497 author=commit.author,
497 498 date=commit.date)
498 499 node = FileNode(safe_bytes(f_path), b'', commit=commit)
499 500 else:
500 501 commit = EmptyCommit(
501 502 repo=self.rhodecode_vcs_repo,
502 503 alias=self.rhodecode_vcs_repo.alias)
503 504 node = FileNode(safe_bytes(f_path), b'', commit=commit)
504 505 return node
505 506
506 507 @LoginRequired()
507 508 @HasRepoPermissionAnyDecorator(
508 509 'repository.read', 'repository.write', 'repository.admin')
509 510 def repo_files_diff(self):
510 511 c = self.load_default_context()
511 512 f_path = self._get_f_path(self.request.matchdict)
512 513 diff1 = self.request.GET.get('diff1', '')
513 514 diff2 = self.request.GET.get('diff2', '')
514 515
515 516 path1, diff1 = parse_path_ref(diff1, default_path=f_path)
516 517
517 518 ignore_whitespace = str2bool(self.request.GET.get('ignorews'))
518 519 line_context = self.request.GET.get('context', 3)
519 520
520 521 if not any((diff1, diff2)):
521 522 h.flash(
522 523 'Need query parameter "diff1" or "diff2" to generate a diff.',
523 524 category='error')
524 525 raise HTTPBadRequest()
525 526
526 527 c.action = self.request.GET.get('diff')
527 528 if c.action not in ['download', 'raw']:
528 529 compare_url = h.route_path(
529 530 'repo_compare',
530 531 repo_name=self.db_repo_name,
531 532 source_ref_type='rev',
532 533 source_ref=diff1,
533 534 target_repo=self.db_repo_name,
534 535 target_ref_type='rev',
535 536 target_ref=diff2,
536 537 _query=dict(f_path=f_path))
537 538 # redirect to new view if we render diff
538 539 raise HTTPFound(compare_url)
539 540
540 541 try:
541 542 node1 = self._get_file_node(diff1, path1)
542 543 node2 = self._get_file_node(diff2, f_path)
543 544 except (RepositoryError, NodeError):
544 545 log.exception("Exception while trying to get node from repository")
545 546 raise HTTPFound(
546 547 h.route_path('repo_files', repo_name=self.db_repo_name,
547 548 commit_id='tip', f_path=f_path))
548 549
549 550 if all(isinstance(node.commit, EmptyCommit)
550 551 for node in (node1, node2)):
551 552 raise HTTPNotFound()
552 553
553 554 c.commit_1 = node1.commit
554 555 c.commit_2 = node2.commit
555 556
556 557 if c.action == 'download':
557 558 _diff = diffs.get_gitdiff(node1, node2,
558 559 ignore_whitespace=ignore_whitespace,
559 560 context=line_context)
560 561 # NOTE: this was using diff_format='gitdiff'
561 562 diff = diffs.DiffProcessor(_diff, diff_format='newdiff')
562 563
563 564 response = Response(self.path_filter.get_raw_patch(diff))
564 565 response.content_type = 'text/plain'
565 566 response.content_disposition = (
566 567 f'attachment; filename={f_path}_{diff1}_vs_{diff2}.diff'
567 568 )
568 569 charset = self._get_default_encoding(c)
569 570 if charset:
570 571 response.charset = charset
571 572 return response
572 573
573 574 elif c.action == 'raw':
574 575 _diff = diffs.get_gitdiff(node1, node2,
575 576 ignore_whitespace=ignore_whitespace,
576 577 context=line_context)
577 578 # NOTE: this was using diff_format='gitdiff'
578 579 diff = diffs.DiffProcessor(_diff, diff_format='newdiff')
579 580
580 581 response = Response(self.path_filter.get_raw_patch(diff))
581 582 response.content_type = 'text/plain'
582 583 charset = self._get_default_encoding(c)
583 584 if charset:
584 585 response.charset = charset
585 586 return response
586 587
587 588 # in case we ever end up here
588 589 raise HTTPNotFound()
589 590
590 591 @LoginRequired()
591 592 @HasRepoPermissionAnyDecorator(
592 593 'repository.read', 'repository.write', 'repository.admin')
593 594 def repo_files_diff_2way_redirect(self):
594 595 """
595 596 Kept only to make OLD links work
596 597 """
597 598 f_path = self._get_f_path_unchecked(self.request.matchdict)
598 599 diff1 = self.request.GET.get('diff1', '')
599 600 diff2 = self.request.GET.get('diff2', '')
600 601
601 602 if not any((diff1, diff2)):
602 603 h.flash(
603 604 'Need query parameter "diff1" or "diff2" to generate a diff.',
604 605 category='error')
605 606 raise HTTPBadRequest()
606 607
607 608 compare_url = h.route_path(
608 609 'repo_compare',
609 610 repo_name=self.db_repo_name,
610 611 source_ref_type='rev',
611 612 source_ref=diff1,
612 613 target_ref_type='rev',
613 614 target_ref=diff2,
614 615 _query=dict(f_path=f_path, diffmode='sideside',
615 616 target_repo=self.db_repo_name,))
616 617 raise HTTPFound(compare_url)
617 618
618 619 @LoginRequired()
619 620 def repo_files_default_commit_redirect(self):
620 621 """
621 622 Special page that redirects to the landing page of files based on the default
622 623 commit for repository
623 624 """
624 625 c = self.load_default_context()
625 626 ref_name = c.rhodecode_db_repo.landing_ref_name
626 627 landing_url = h.repo_files_by_ref_url(
627 628 c.rhodecode_db_repo.repo_name,
628 629 c.rhodecode_db_repo.repo_type,
629 630 f_path='',
630 631 ref_name=ref_name,
631 632 commit_id='tip',
632 633 query=dict(at=ref_name)
633 634 )
634 635
635 636 raise HTTPFound(landing_url)
636 637
637 638 @LoginRequired()
638 639 @HasRepoPermissionAnyDecorator(
639 640 'repository.read', 'repository.write', 'repository.admin')
640 641 def repo_files(self):
641 642 c = self.load_default_context()
642 643
643 644 view_name = getattr(self.request.matched_route, 'name', None)
644 645
645 646 c.annotate = view_name == 'repo_files:annotated'
646 647 # default is false, but .rst/.md files later are auto rendered, we can
647 648 # overwrite auto rendering by setting this GET flag
648 649 c.renderer = view_name == 'repo_files:rendered' or not self.request.GET.get('no-render', False)
649 650
650 651 commit_id, f_path = self._get_commit_and_path()
651 652
652 653 c.commit = self._get_commit_or_redirect(commit_id)
653 654 c.branch = self.request.GET.get('branch', None)
654 655 c.f_path = f_path
655 656 at_rev = self.request.GET.get('at')
656 657
657 658 # files or dirs
658 659 try:
659 660 c.file = c.commit.get_node(f_path, pre_load=['is_binary', 'size', 'data'])
660 661
661 662 c.file_author = True
662 663 c.file_tree = ''
663 664
664 665 # prev link
665 666 try:
666 667 prev_commit = c.commit.prev(c.branch)
667 668 c.prev_commit = prev_commit
668 669 c.url_prev = h.route_path(
669 670 'repo_files', repo_name=self.db_repo_name,
670 671 commit_id=prev_commit.raw_id, f_path=f_path)
671 672 if c.branch:
672 673 c.url_prev += '?branch=%s' % c.branch
673 674 except (CommitDoesNotExistError, VCSError):
674 675 c.url_prev = '#'
675 676 c.prev_commit = EmptyCommit()
676 677
677 678 # next link
678 679 try:
679 680 next_commit = c.commit.next(c.branch)
680 681 c.next_commit = next_commit
681 682 c.url_next = h.route_path(
682 683 'repo_files', repo_name=self.db_repo_name,
683 684 commit_id=next_commit.raw_id, f_path=f_path)
684 685 if c.branch:
685 686 c.url_next += '?branch=%s' % c.branch
686 687 except (CommitDoesNotExistError, VCSError):
687 688 c.url_next = '#'
688 689 c.next_commit = EmptyCommit()
689 690
690 691 # load file content
691 692 if c.file.is_file():
692 693
693 694 c.lf_node = {}
694 695
695 696 has_lf_enabled = self._is_lf_enabled(self.db_repo)
696 697 if has_lf_enabled:
697 698 c.lf_node = c.file.get_largefile_node()
698 699
699 700 c.file_source_page = 'true'
700 701 c.file_last_commit = c.file.last_commit
701 702
702 703 c.file_size_too_big = c.file.size > c.visual.cut_off_limit_file
703 704
704 705 if not (c.file_size_too_big or c.file.is_binary):
705 706 if c.annotate: # annotation has precedence over renderer
706 707 c.annotated_lines = filenode_as_annotated_lines_tokens(
707 708 c.file
708 709 )
709 710 else:
710 711 c.renderer = (
711 712 c.renderer and h.renderer_from_filename(c.file.path)
712 713 )
713 714 if not c.renderer:
714 715 c.lines = filenode_as_lines_tokens(c.file)
715 716
716 717 _branch_name, _sha_commit_id, is_head = \
717 718 self._is_valid_head(commit_id, self.rhodecode_vcs_repo,
718 719 landing_ref=self.db_repo.landing_ref_name)
719 720 c.on_branch_head = is_head
720 721
721 722 branch = c.commit.branch if (
722 723 c.commit.branch and '/' not in c.commit.branch) else None
723 724 c.branch_or_raw_id = branch or c.commit.raw_id
724 725 c.branch_name = c.commit.branch or h.short_id(c.commit.raw_id)
725 726
726 727 author = c.file_last_commit.author
727 728 c.authors = [[
728 729 h.email(author),
729 730 h.person(author, 'username_or_name_or_email'),
730 731 1
731 732 ]]
732 733
733 734 else: # load tree content at path
734 735 c.file_source_page = 'false'
735 736 c.authors = []
736 737 # this loads a simple tree without metadata to speed things up
737 738 # later via ajax we call repo_nodetree_full and fetch whole
738 739 c.file_tree = self._get_tree_at_commit(c, c.commit.raw_id, f_path, at_rev=at_rev)
739 740
740 741 c.readme_data, c.readme_file = \
741 742 self._get_readme_data(self.db_repo, c.visual.default_renderer,
742 743 c.commit.raw_id, f_path)
743 744
744 745 except RepositoryError as e:
745 746 h.flash(h.escape(safe_str(e)), category='error')
746 747 raise HTTPNotFound()
747 748
748 749 if self.request.environ.get('HTTP_X_PJAX'):
749 750 html = render('rhodecode:templates/files/files_pjax.mako',
750 751 self._get_template_context(c), self.request)
751 752 else:
752 753 html = render('rhodecode:templates/files/files.mako',
753 754 self._get_template_context(c), self.request)
754 755 return Response(html)
755 756
756 757 @HasRepoPermissionAnyDecorator(
757 758 'repository.read', 'repository.write', 'repository.admin')
758 759 def repo_files_annotated_previous(self):
759 760 self.load_default_context()
760 761
761 762 commit_id, f_path = self._get_commit_and_path()
762 763 commit = self._get_commit_or_redirect(commit_id)
763 764 prev_commit_id = commit.raw_id
764 765 line_anchor = self.request.GET.get('line_anchor')
765 766 is_file = False
766 767 try:
767 768 _file = commit.get_node(f_path)
768 769 is_file = _file.is_file()
769 770 except (NodeDoesNotExistError, CommitDoesNotExistError, VCSError):
770 771 pass
771 772
772 773 if is_file:
773 774 history = commit.get_path_history(f_path)
774 775 prev_commit_id = history[1].raw_id \
775 776 if len(history) > 1 else prev_commit_id
776 777 prev_url = h.route_path(
777 778 'repo_files:annotated', repo_name=self.db_repo_name,
778 779 commit_id=prev_commit_id, f_path=f_path,
779 780 _anchor=f'L{line_anchor}')
780 781
781 782 raise HTTPFound(prev_url)
782 783
783 784 @LoginRequired()
784 785 @HasRepoPermissionAnyDecorator(
785 786 'repository.read', 'repository.write', 'repository.admin')
786 787 def repo_nodetree_full(self):
787 788 """
788 789 Returns rendered html of file tree that contains commit date,
789 790 author, commit_id for the specified combination of
790 791 repo, commit_id and file path
791 792 """
792 793 c = self.load_default_context()
793 794
794 795 commit_id, f_path = self._get_commit_and_path()
795 796 commit = self._get_commit_or_redirect(commit_id)
796 797 try:
797 798 dir_node = commit.get_node(f_path)
798 799 except RepositoryError as e:
799 800 return Response(f'error: {h.escape(safe_str(e))}')
800 801
801 802 if dir_node.is_file():
802 803 return Response('')
803 804
804 805 c.file = dir_node
805 806 c.commit = commit
806 807 at_rev = self.request.GET.get('at')
807 808
808 809 html = self._get_tree_at_commit(
809 810 c, commit.raw_id, dir_node.path, full_load=True, at_rev=at_rev)
810 811
811 812 return Response(html)
812 813
813 814 def _get_attachement_headers(self, f_path):
814 815 f_name = safe_str(f_path.split(Repository.NAME_SEP)[-1])
815 816 safe_path = f_name.replace('"', '\\"')
816 817 encoded_path = urllib.parse.quote(f_name)
817 818
818 819 headers = "attachment; " \
819 820 "filename=\"{}\"; " \
820 821 "filename*=UTF-8\'\'{}".format(safe_path, encoded_path)
821 822
822 823 return safe_bytes(headers).decode('latin-1', errors='replace')
823 824
824 825 @LoginRequired()
825 826 @HasRepoPermissionAnyDecorator(
826 827 'repository.read', 'repository.write', 'repository.admin')
827 828 def repo_file_raw(self):
828 829 """
829 830 Action for show as raw, some mimetypes are "rendered",
830 831 those include images, icons.
831 832 """
832 833 c = self.load_default_context()
833 834
834 835 commit_id, f_path = self._get_commit_and_path()
835 836 commit = self._get_commit_or_redirect(commit_id)
836 837 file_node = self._get_filenode_or_redirect(commit, f_path)
837 838
838 839 raw_mimetype_mapping = {
839 840 # map original mimetype to a mimetype used for "show as raw"
840 841 # you can also provide a content-disposition to override the
841 842 # default "attachment" disposition.
842 843 # orig_type: (new_type, new_dispo)
843 844
844 845 # show images inline:
845 846 # Do not re-add SVG: it is unsafe and permits XSS attacks. One can
846 847 # for example render an SVG with javascript inside or even render
847 848 # HTML.
848 849 'image/x-icon': ('image/x-icon', 'inline'),
849 850 'image/png': ('image/png', 'inline'),
850 851 'image/gif': ('image/gif', 'inline'),
851 852 'image/jpeg': ('image/jpeg', 'inline'),
852 853 'application/pdf': ('application/pdf', 'inline'),
853 854 }
854 855
855 856 mimetype = file_node.mimetype
856 857 try:
857 858 mimetype, disposition = raw_mimetype_mapping[mimetype]
858 859 except KeyError:
859 860 # we don't know anything special about this, handle it safely
860 861 if file_node.is_binary:
861 862 # do same as download raw for binary files
862 863 mimetype, disposition = 'application/octet-stream', 'attachment'
863 864 else:
864 865 # do not just use the original mimetype, but force text/plain,
865 866 # otherwise it would serve text/html and that might be unsafe.
866 867 # Note: underlying vcs library fakes text/plain mimetype if the
867 868 # mimetype can not be determined and it thinks it is not
868 869 # binary.This might lead to erroneous text display in some
869 870 # cases, but helps in other cases, like with text files
870 871 # without extension.
871 872 mimetype, disposition = 'text/plain', 'inline'
872 873
873 874 if disposition == 'attachment':
874 875 disposition = self._get_attachement_headers(f_path)
875 876
876 877 stream_content = file_node.stream_bytes()
877 878
878 879 response = Response(app_iter=stream_content)
879 880 response.content_disposition = disposition
880 881 response.content_type = mimetype
881 882
882 883 charset = self._get_default_encoding(c)
883 884 if charset:
884 885 response.charset = charset
885 886
886 887 return response
887 888
888 889 @LoginRequired()
889 890 @HasRepoPermissionAnyDecorator(
890 891 'repository.read', 'repository.write', 'repository.admin')
891 892 def repo_file_download(self):
892 893 c = self.load_default_context()
893 894
894 895 commit_id, f_path = self._get_commit_and_path()
895 896 commit = self._get_commit_or_redirect(commit_id)
896 897 file_node = self._get_filenode_or_redirect(commit, f_path)
897 898
898 899 if self.request.GET.get('lf'):
899 900 # only if lf get flag is passed, we download this file
900 901 # as LFS/Largefile
901 902 lf_node = file_node.get_largefile_node()
902 903 if lf_node:
903 904 # overwrite our pointer with the REAL large-file
904 905 file_node = lf_node
905 906
906 907 disposition = self._get_attachement_headers(f_path)
907 908
908 909 stream_content = file_node.stream_bytes()
909 910
910 911 response = Response(app_iter=stream_content)
911 912 response.content_disposition = disposition
912 913 response.content_type = file_node.mimetype
913 914
914 915 charset = self._get_default_encoding(c)
915 916 if charset:
916 917 response.charset = charset
917 918
918 919 return response
919 920
920 921 def _get_nodelist_at_commit(self, repo_name, repo_id, commit_id, f_path):
921 922
922 923 cache_seconds = safe_int(
923 924 rhodecode.CONFIG.get('rc_cache.cache_repo.expiration_time'))
924 925 cache_on = cache_seconds > 0
925 926 log.debug(
926 927 'Computing FILE SEARCH for repo_id %s commit_id `%s` and path `%s`'
927 928 'with caching: %s[TTL: %ss]' % (
928 929 repo_id, commit_id, f_path, cache_on, cache_seconds or 0))
929 930
930 931 cache_namespace_uid = f'repo.{repo_id}'
931 932 region = rc_cache.get_or_create_region('cache_repo', cache_namespace_uid)
932 933
933 934 @region.conditional_cache_on_arguments(namespace=cache_namespace_uid, condition=cache_on)
934 935 def compute_file_search(_name_hash, _repo_id, _commit_id, _f_path):
935 936 log.debug('Generating cached nodelist for repo_id:%s, %s, %s',
936 937 _repo_id, commit_id, f_path)
937 938 try:
938 939 _d, _f = ScmModel().get_quick_filter_nodes(repo_name, _commit_id, _f_path)
939 940 except (RepositoryError, CommitDoesNotExistError, Exception) as e:
940 941 log.exception(safe_str(e))
941 942 h.flash(h.escape(safe_str(e)), category='error')
942 943 raise HTTPFound(h.route_path(
943 944 'repo_files', repo_name=self.db_repo_name,
944 945 commit_id='tip', f_path='/'))
945 946
946 947 return _d + _f
947 948
948 949 result = compute_file_search(self.db_repo.repo_name_hash, self.db_repo.repo_id,
949 950 commit_id, f_path)
950 951 return filter(lambda n: self.path_filter.path_access_allowed(n['name']), result)
951 952
952 953 @LoginRequired()
953 954 @HasRepoPermissionAnyDecorator(
954 955 'repository.read', 'repository.write', 'repository.admin')
955 956 def repo_nodelist(self):
956 957 self.load_default_context()
957 958
958 959 commit_id, f_path = self._get_commit_and_path()
959 960 commit = self._get_commit_or_redirect(commit_id)
960 961
961 962 metadata = self._get_nodelist_at_commit(
962 963 self.db_repo_name, self.db_repo.repo_id, commit.raw_id, f_path)
963 964 return {'nodes': [x for x in metadata]}
964 965
965 966 def _create_references(self, branches_or_tags, symbolic_reference, f_path, ref_type):
966 967 items = []
967 968 for name, commit_id in branches_or_tags.items():
968 969 sym_ref = symbolic_reference(commit_id, name, f_path, ref_type)
969 970 items.append((sym_ref, name, ref_type))
970 971 return items
971 972
972 973 def _symbolic_reference(self, commit_id, name, f_path, ref_type):
973 974 return commit_id
974 975
975 976 def _symbolic_reference_svn(self, commit_id, name, f_path, ref_type):
976 977 return commit_id
977 978
978 979 # NOTE(dan): old code we used in "diff" mode compare
979 980 new_f_path = vcspath.join(name, f_path)
980 981 return f'{new_f_path}@{commit_id}'
981 982
982 983 def _get_node_history(self, commit_obj, f_path, commits=None):
983 984 """
984 985 get commit history for given node
985 986
986 987 :param commit_obj: commit to calculate history
987 988 :param f_path: path for node to calculate history for
988 989 :param commits: if passed don't calculate history and take
989 990 commits defined in this list
990 991 """
991 992 _ = self.request.translate
992 993
993 994 # calculate history based on tip
994 995 tip = self.rhodecode_vcs_repo.get_commit()
995 996 if commits is None:
996 997 pre_load = ["author", "branch"]
997 998 try:
998 999 commits = tip.get_path_history(f_path, pre_load=pre_load)
999 1000 except (NodeDoesNotExistError, CommitError):
1000 1001 # this node is not present at tip!
1001 1002 commits = commit_obj.get_path_history(f_path, pre_load=pre_load)
1002 1003
1003 1004 history = []
1004 1005 commits_group = ([], _("Changesets"))
1005 1006 for commit in commits:
1006 1007 branch = ' (%s)' % commit.branch if commit.branch else ''
1007 1008 n_desc = f'r{commit.idx}:{commit.short_id}{branch}'
1008 1009 commits_group[0].append((commit.raw_id, n_desc, 'sha'))
1009 1010 history.append(commits_group)
1010 1011
1011 1012 symbolic_reference = self._symbolic_reference
1012 1013
1013 1014 if self.rhodecode_vcs_repo.alias == 'svn':
1014 1015 adjusted_f_path = RepoFilesView.adjust_file_path_for_svn(
1015 1016 f_path, self.rhodecode_vcs_repo)
1016 1017 if adjusted_f_path != f_path:
1017 1018 log.debug(
1018 1019 'Recognized svn tag or branch in file "%s", using svn '
1019 1020 'specific symbolic references', f_path)
1020 1021 f_path = adjusted_f_path
1021 1022 symbolic_reference = self._symbolic_reference_svn
1022 1023
1023 1024 branches = self._create_references(
1024 1025 self.rhodecode_vcs_repo.branches, symbolic_reference, f_path, 'branch')
1025 1026 branches_group = (branches, _("Branches"))
1026 1027
1027 1028 tags = self._create_references(
1028 1029 self.rhodecode_vcs_repo.tags, symbolic_reference, f_path, 'tag')
1029 1030 tags_group = (tags, _("Tags"))
1030 1031
1031 1032 history.append(branches_group)
1032 1033 history.append(tags_group)
1033 1034
1034 1035 return history, commits
1035 1036
1036 1037 @LoginRequired()
1037 1038 @HasRepoPermissionAnyDecorator(
1038 1039 'repository.read', 'repository.write', 'repository.admin')
1039 1040 def repo_file_history(self):
1040 1041 self.load_default_context()
1041 1042
1042 1043 commit_id, f_path = self._get_commit_and_path()
1043 1044 commit = self._get_commit_or_redirect(commit_id)
1044 1045 file_node = self._get_filenode_or_redirect(commit, f_path)
1045 1046
1046 1047 if file_node.is_file():
1047 1048 file_history, _hist = self._get_node_history(commit, f_path)
1048 1049
1049 1050 res = []
1050 1051 for section_items, section in file_history:
1051 1052 items = []
1052 1053 for obj_id, obj_text, obj_type in section_items:
1053 1054 at_rev = ''
1054 1055 if obj_type in ['branch', 'bookmark', 'tag']:
1055 1056 at_rev = obj_text
1056 1057 entry = {
1057 1058 'id': obj_id,
1058 1059 'text': obj_text,
1059 1060 'type': obj_type,
1060 1061 'at_rev': at_rev
1061 1062 }
1062 1063
1063 1064 items.append(entry)
1064 1065
1065 1066 res.append({
1066 1067 'text': section,
1067 1068 'children': items
1068 1069 })
1069 1070
1070 1071 data = {
1071 1072 'more': False,
1072 1073 'results': res
1073 1074 }
1074 1075 return data
1075 1076
1076 1077 log.warning('Cannot fetch history for directory')
1077 1078 raise HTTPBadRequest()
1078 1079
1079 1080 @LoginRequired()
1080 1081 @HasRepoPermissionAnyDecorator(
1081 1082 'repository.read', 'repository.write', 'repository.admin')
1082 1083 def repo_file_authors(self):
1083 1084 c = self.load_default_context()
1084 1085
1085 1086 commit_id, f_path = self._get_commit_and_path()
1086 1087 commit = self._get_commit_or_redirect(commit_id)
1087 1088 file_node = self._get_filenode_or_redirect(commit, f_path)
1088 1089
1089 1090 if not file_node.is_file():
1090 1091 raise HTTPBadRequest()
1091 1092
1092 1093 c.file_last_commit = file_node.last_commit
1093 1094 if self.request.GET.get('annotate') == '1':
1094 1095 # use _hist from annotation if annotation mode is on
1095 1096 commit_ids = {x[1] for x in file_node.annotate}
1096 1097 _hist = (
1097 1098 self.rhodecode_vcs_repo.get_commit(commit_id)
1098 1099 for commit_id in commit_ids)
1099 1100 else:
1100 1101 _f_history, _hist = self._get_node_history(commit, f_path)
1101 1102 c.file_author = False
1102 1103
1103 1104 unique = collections.OrderedDict()
1104 1105 for commit in _hist:
1105 1106 author = commit.author
1106 1107 if author not in unique:
1107 1108 unique[commit.author] = [
1108 1109 h.email(author),
1109 1110 h.person(author, 'username_or_name_or_email'),
1110 1111 1 # counter
1111 1112 ]
1112 1113
1113 1114 else:
1114 1115 # increase counter
1115 1116 unique[commit.author][2] += 1
1116 1117
1117 1118 c.authors = [val for val in unique.values()]
1118 1119
1119 1120 return self._get_template_context(c)
1120 1121
1121 1122 @LoginRequired()
1122 1123 @HasRepoPermissionAnyDecorator('repository.write', 'repository.admin')
1123 1124 def repo_files_check_head(self):
1124 1125 self.load_default_context()
1125 1126
1126 1127 commit_id, f_path = self._get_commit_and_path()
1127 1128 _branch_name, _sha_commit_id, is_head = \
1128 1129 self._is_valid_head(commit_id, self.rhodecode_vcs_repo,
1129 1130 landing_ref=self.db_repo.landing_ref_name)
1130 1131
1131 1132 new_path = self.request.POST.get('path')
1132 1133 operation = self.request.POST.get('operation')
1133 1134 path_exist = ''
1134 1135
1135 1136 if new_path and operation in ['create', 'upload']:
1136 1137 new_f_path = os.path.join(f_path.lstrip('/'), new_path)
1137 1138 try:
1138 1139 commit_obj = self.rhodecode_vcs_repo.get_commit(commit_id)
1139 1140 # NOTE(dan): construct whole path without leading /
1140 1141 file_node = commit_obj.get_node(new_f_path)
1141 1142 if file_node is not None:
1142 1143 path_exist = new_f_path
1143 1144 except EmptyRepositoryError:
1144 1145 pass
1145 1146 except Exception:
1146 1147 pass
1147 1148
1148 1149 return {
1149 1150 'branch': _branch_name,
1150 1151 'sha': _sha_commit_id,
1151 1152 'is_head': is_head,
1152 1153 'path_exists': path_exist
1153 1154 }
1154 1155
1155 1156 @LoginRequired()
1156 1157 @HasRepoPermissionAnyDecorator('repository.write', 'repository.admin')
1157 1158 def repo_files_remove_file(self):
1158 1159 _ = self.request.translate
1159 1160 c = self.load_default_context()
1160 1161 commit_id, f_path = self._get_commit_and_path()
1161 1162
1162 1163 self._ensure_not_locked()
1163 1164 _branch_name, _sha_commit_id, is_head = \
1164 1165 self._is_valid_head(commit_id, self.rhodecode_vcs_repo,
1165 1166 landing_ref=self.db_repo.landing_ref_name)
1166 1167
1167 1168 self.forbid_non_head(is_head, f_path)
1168 1169 self.check_branch_permission(_branch_name)
1169 1170
1170 1171 c.commit = self._get_commit_or_redirect(commit_id)
1171 1172 c.file = self._get_filenode_or_redirect(c.commit, f_path)
1172 1173
1173 1174 c.default_message = _(
1174 1175 'Deleted file {} via RhodeCode Enterprise').format(f_path)
1175 1176 c.f_path = f_path
1176 1177
1177 1178 return self._get_template_context(c)
1178 1179
1179 1180 @LoginRequired()
1180 1181 @HasRepoPermissionAnyDecorator('repository.write', 'repository.admin')
1181 1182 @CSRFRequired()
1182 1183 def repo_files_delete_file(self):
1183 1184 _ = self.request.translate
1184 1185
1185 1186 c = self.load_default_context()
1186 1187 commit_id, f_path = self._get_commit_and_path()
1187 1188
1188 1189 self._ensure_not_locked()
1189 1190 _branch_name, _sha_commit_id, is_head = \
1190 1191 self._is_valid_head(commit_id, self.rhodecode_vcs_repo,
1191 1192 landing_ref=self.db_repo.landing_ref_name)
1192 1193
1193 1194 self.forbid_non_head(is_head, f_path)
1194 1195 self.check_branch_permission(_branch_name)
1195 1196
1196 1197 c.commit = self._get_commit_or_redirect(commit_id)
1197 1198 c.file = self._get_filenode_or_redirect(c.commit, f_path)
1198 1199
1199 1200 c.default_message = _(
1200 1201 'Deleted file {} via RhodeCode Enterprise').format(f_path)
1201 1202 c.f_path = f_path
1202 1203 node_path = f_path
1203 1204 author = self._rhodecode_db_user.full_contact
1204 1205 message = self.request.POST.get('message') or c.default_message
1205 1206 try:
1206 1207 nodes = {
1207 1208 safe_bytes(node_path): {
1208 1209 'content': b''
1209 1210 }
1210 1211 }
1211 1212 ScmModel().delete_nodes(
1212 1213 user=self._rhodecode_db_user.user_id, repo=self.db_repo,
1213 1214 message=message,
1214 1215 nodes=nodes,
1215 1216 parent_commit=c.commit,
1216 1217 author=author,
1217 1218 )
1218 1219
1219 1220 h.flash(
1220 1221 _('Successfully deleted file `{}`').format(
1221 1222 h.escape(f_path)), category='success')
1222 1223 except Exception:
1223 1224 log.exception('Error during commit operation')
1224 1225 h.flash(_('Error occurred during commit'), category='error')
1225 1226 raise HTTPFound(
1226 1227 h.route_path('repo_commit', repo_name=self.db_repo_name,
1227 1228 commit_id='tip'))
1228 1229
1229 1230 @LoginRequired()
1230 1231 @HasRepoPermissionAnyDecorator('repository.write', 'repository.admin')
1231 1232 def repo_files_edit_file(self):
1232 1233 _ = self.request.translate
1233 1234 c = self.load_default_context()
1234 1235 commit_id, f_path = self._get_commit_and_path()
1235 1236
1236 1237 self._ensure_not_locked()
1237 1238 _branch_name, _sha_commit_id, is_head = \
1238 1239 self._is_valid_head(commit_id, self.rhodecode_vcs_repo,
1239 1240 landing_ref=self.db_repo.landing_ref_name)
1240 1241
1241 1242 self.forbid_non_head(is_head, f_path, commit_id=commit_id)
1242 1243 self.check_branch_permission(_branch_name, commit_id=commit_id)
1243 1244
1244 1245 c.commit = self._get_commit_or_redirect(commit_id)
1245 1246 c.file = self._get_filenode_or_redirect(c.commit, f_path)
1246 1247
1247 1248 if c.file.is_binary:
1248 1249 files_url = h.route_path(
1249 1250 'repo_files',
1250 1251 repo_name=self.db_repo_name,
1251 1252 commit_id=c.commit.raw_id, f_path=f_path)
1252 1253 raise HTTPFound(files_url)
1253 1254
1254 1255 c.default_message = _('Edited file {} via RhodeCode Enterprise').format(f_path)
1255 1256 c.f_path = f_path
1256 1257
1257 1258 return self._get_template_context(c)
1258 1259
1259 1260 @LoginRequired()
1260 1261 @HasRepoPermissionAnyDecorator('repository.write', 'repository.admin')
1261 1262 @CSRFRequired()
1262 1263 def repo_files_update_file(self):
1263 1264 _ = self.request.translate
1264 1265 c = self.load_default_context()
1265 1266 commit_id, f_path = self._get_commit_and_path()
1266 1267
1267 1268 self._ensure_not_locked()
1268 1269
1269 1270 c.commit = self._get_commit_or_redirect(commit_id)
1270 1271 c.file = self._get_filenode_or_redirect(c.commit, f_path)
1271 1272
1272 1273 if c.file.is_binary:
1273 1274 raise HTTPFound(h.route_path('repo_files', repo_name=self.db_repo_name,
1274 1275 commit_id=c.commit.raw_id, f_path=f_path))
1275 1276
1276 1277 _branch_name, _sha_commit_id, is_head = \
1277 1278 self._is_valid_head(commit_id, self.rhodecode_vcs_repo,
1278 1279 landing_ref=self.db_repo.landing_ref_name)
1279 1280
1280 1281 self.forbid_non_head(is_head, f_path, commit_id=commit_id)
1281 1282 self.check_branch_permission(_branch_name, commit_id=commit_id)
1282 1283
1283 1284 c.default_message = _('Edited file {} via RhodeCode Enterprise').format(f_path)
1284 1285 c.f_path = f_path
1285 1286
1286 1287 old_content = c.file.str_content
1287 1288 sl = old_content.splitlines(1)
1288 1289 first_line = sl[0] if sl else ''
1289 1290
1290 1291 r_post = self.request.POST
1291 1292 # line endings: 0 - Unix, 1 - Mac, 2 - DOS
1292 1293 line_ending_mode = detect_mode(first_line, 0)
1293 1294 content = convert_line_endings(r_post.get('content', ''), line_ending_mode)
1294 1295
1295 1296 message = r_post.get('message') or c.default_message
1296 1297
1297 1298 org_node_path = c.file.str_path
1298 1299 filename = r_post['filename']
1299 1300
1300 1301 root_path = c.file.dir_path
1301 1302 pure_path = self.create_pure_path(root_path, filename)
1302 1303 node_path = pure_path.as_posix()
1303 1304
1304 1305 default_redirect_url = h.route_path('repo_commit', repo_name=self.db_repo_name,
1305 1306 commit_id=commit_id)
1306 1307 if content == old_content and node_path == org_node_path:
1307 1308 h.flash(_('No changes detected on {}').format(h.escape(org_node_path)),
1308 1309 category='warning')
1309 1310 raise HTTPFound(default_redirect_url)
1310 1311
1311 1312 try:
1312 1313 mapping = {
1313 1314 c.file.bytes_path: {
1314 1315 'org_filename': org_node_path,
1315 1316 'filename': safe_bytes(node_path),
1316 1317 'content': safe_bytes(content),
1317 1318 'lexer': '',
1318 1319 'op': 'mod',
1319 1320 'mode': c.file.mode
1320 1321 }
1321 1322 }
1322 1323
1323 1324 commit = ScmModel().update_nodes(
1324 1325 user=self._rhodecode_db_user.user_id,
1325 1326 repo=self.db_repo,
1326 1327 message=message,
1327 1328 nodes=mapping,
1328 1329 parent_commit=c.commit,
1329 1330 )
1330 1331
1331 1332 h.flash(_('Successfully committed changes to file `{}`').format(
1332 1333 h.escape(f_path)), category='success')
1333 1334 default_redirect_url = h.route_path(
1334 1335 'repo_commit', repo_name=self.db_repo_name, commit_id=commit.raw_id)
1335 1336
1336 1337 except Exception:
1337 1338 log.exception('Error occurred during commit')
1338 1339 h.flash(_('Error occurred during commit'), category='error')
1339 1340
1340 1341 raise HTTPFound(default_redirect_url)
1341 1342
1342 1343 @LoginRequired()
1343 1344 @HasRepoPermissionAnyDecorator('repository.write', 'repository.admin')
1344 1345 def repo_files_add_file(self):
1345 1346 _ = self.request.translate
1346 1347 c = self.load_default_context()
1347 1348 commit_id, f_path = self._get_commit_and_path()
1348 1349
1349 1350 self._ensure_not_locked()
1350 1351
1351 1352 # Check if we need to use this page to upload binary
1352 1353 upload_binary = str2bool(self.request.params.get('upload_binary', False))
1353 1354
1354 1355 c.commit = self._get_commit_or_redirect(commit_id, redirect_after=False)
1355 1356 if c.commit is None:
1356 1357 c.commit = EmptyCommit(alias=self.rhodecode_vcs_repo.alias)
1357 1358
1358 1359 if self.rhodecode_vcs_repo.is_empty():
1359 1360 # for empty repository we cannot check for current branch, we rely on
1360 1361 # c.commit.branch instead
1361 1362 _branch_name, _sha_commit_id, is_head = c.commit.branch, '', True
1362 1363 else:
1363 1364 _branch_name, _sha_commit_id, is_head = \
1364 1365 self._is_valid_head(commit_id, self.rhodecode_vcs_repo,
1365 1366 landing_ref=self.db_repo.landing_ref_name)
1366 1367
1367 1368 self.forbid_non_head(is_head, f_path, commit_id=commit_id)
1368 1369 self.check_branch_permission(_branch_name, commit_id=commit_id)
1369 1370
1370 1371 c.default_message = (_('Added file via RhodeCode Enterprise')) \
1371 1372 if not upload_binary else (_('Edited file {} via RhodeCode Enterprise').format(f_path))
1372 1373 c.f_path = f_path.lstrip('/') # ensure not relative path
1373 1374 c.replace_binary = upload_binary
1374 1375
1375 1376 return self._get_template_context(c)
1376 1377
1377 1378 @LoginRequired()
1378 1379 @HasRepoPermissionAnyDecorator('repository.write', 'repository.admin')
1379 1380 @CSRFRequired()
1380 1381 def repo_files_create_file(self):
1381 1382 _ = self.request.translate
1382 1383 c = self.load_default_context()
1383 1384 commit_id, f_path = self._get_commit_and_path()
1384 1385
1385 1386 self._ensure_not_locked()
1386 1387
1387 1388 c.commit = self._get_commit_or_redirect(commit_id, redirect_after=False)
1388 1389 if c.commit is None:
1389 1390 c.commit = EmptyCommit(alias=self.rhodecode_vcs_repo.alias)
1390 1391
1391 1392 # calculate redirect URL
1392 1393 if self.rhodecode_vcs_repo.is_empty():
1393 1394 default_redirect_url = h.route_path(
1394 1395 'repo_summary', repo_name=self.db_repo_name)
1395 1396 else:
1396 1397 default_redirect_url = h.route_path(
1397 1398 'repo_commit', repo_name=self.db_repo_name, commit_id='tip')
1398 1399
1399 1400 if self.rhodecode_vcs_repo.is_empty():
1400 1401 # for empty repository we cannot check for current branch, we rely on
1401 1402 # c.commit.branch instead
1402 1403 _branch_name, _sha_commit_id, is_head = c.commit.branch, '', True
1403 1404 else:
1404 1405 _branch_name, _sha_commit_id, is_head = \
1405 1406 self._is_valid_head(commit_id, self.rhodecode_vcs_repo,
1406 1407 landing_ref=self.db_repo.landing_ref_name)
1407 1408
1408 1409 self.forbid_non_head(is_head, f_path, commit_id=commit_id)
1409 1410 self.check_branch_permission(_branch_name, commit_id=commit_id)
1410 1411
1411 1412 c.default_message = (_('Added file via RhodeCode Enterprise'))
1412 1413 c.f_path = f_path
1413 1414
1414 1415 r_post = self.request.POST
1415 1416 message = r_post.get('message') or c.default_message
1416 1417 filename = r_post.get('filename')
1417 1418 unix_mode = 0
1418 1419
1419 1420 if not filename:
1420 1421 # If there's no commit, redirect to repo summary
1421 1422 if type(c.commit) is EmptyCommit:
1422 1423 redirect_url = h.route_path(
1423 1424 'repo_summary', repo_name=self.db_repo_name)
1424 1425 else:
1425 1426 redirect_url = default_redirect_url
1426 1427 h.flash(_('No filename specified'), category='warning')
1427 1428 raise HTTPFound(redirect_url)
1428 1429
1429 1430 root_path = f_path
1430 1431 pure_path = self.create_pure_path(root_path, filename)
1431 1432 node_path = pure_path.as_posix().lstrip('/')
1432 1433
1433 1434 author = self._rhodecode_db_user.full_contact
1434 1435 content = convert_line_endings(r_post.get('content', ''), unix_mode)
1435 1436 nodes = {
1436 1437 safe_bytes(node_path): {
1437 1438 'content': safe_bytes(content)
1438 1439 }
1439 1440 }
1440 1441
1441 1442 try:
1442 1443
1443 1444 commit = ScmModel().create_nodes(
1444 1445 user=self._rhodecode_db_user.user_id,
1445 1446 repo=self.db_repo,
1446 1447 message=message,
1447 1448 nodes=nodes,
1448 1449 parent_commit=c.commit,
1449 1450 author=author,
1450 1451 )
1451 1452
1452 1453 h.flash(_('Successfully committed new file `{}`').format(
1453 1454 h.escape(node_path)), category='success')
1454 1455
1455 1456 default_redirect_url = h.route_path(
1456 1457 'repo_commit', repo_name=self.db_repo_name, commit_id=commit.raw_id)
1457 1458
1458 1459 except NonRelativePathError:
1459 1460 log.exception('Non Relative path found')
1460 1461 h.flash(_('The location specified must be a relative path and must not '
1461 1462 'contain .. in the path'), category='warning')
1462 1463 raise HTTPFound(default_redirect_url)
1463 1464 except (NodeError, NodeAlreadyExistsError) as e:
1464 1465 h.flash(h.escape(safe_str(e)), category='error')
1465 1466 except Exception:
1466 1467 log.exception('Error occurred during commit')
1467 1468 h.flash(_('Error occurred during commit'), category='error')
1468 1469
1469 1470 raise HTTPFound(default_redirect_url)
1470 1471
1471 1472 @LoginRequired()
1472 1473 @HasRepoPermissionAnyDecorator('repository.write', 'repository.admin')
1473 1474 @CSRFRequired()
1474 1475 def repo_files_upload_file(self):
1475 1476 _ = self.request.translate
1476 1477 c = self.load_default_context()
1477 1478 commit_id, f_path = self._get_commit_and_path()
1478 1479
1479 1480 self._ensure_not_locked()
1480 1481
1481 1482 c.commit = self._get_commit_or_redirect(commit_id, redirect_after=False)
1482 1483 if c.commit is None:
1483 1484 c.commit = EmptyCommit(alias=self.rhodecode_vcs_repo.alias)
1484 1485
1485 1486 # calculate redirect URL
1486 1487 if self.rhodecode_vcs_repo.is_empty():
1487 1488 default_redirect_url = h.route_path(
1488 1489 'repo_summary', repo_name=self.db_repo_name)
1489 1490 else:
1490 1491 default_redirect_url = h.route_path(
1491 1492 'repo_commit', repo_name=self.db_repo_name, commit_id='tip')
1492 1493
1493 1494 if self.rhodecode_vcs_repo.is_empty():
1494 1495 # for empty repository we cannot check for current branch, we rely on
1495 1496 # c.commit.branch instead
1496 1497 _branch_name, _sha_commit_id, is_head = c.commit.branch, '', True
1497 1498 else:
1498 1499 _branch_name, _sha_commit_id, is_head = \
1499 1500 self._is_valid_head(commit_id, self.rhodecode_vcs_repo,
1500 1501 landing_ref=self.db_repo.landing_ref_name)
1501 1502
1502 1503 error = self.forbid_non_head(is_head, f_path, json_mode=True)
1503 1504 if error:
1504 1505 return {
1505 1506 'error': error,
1506 1507 'redirect_url': default_redirect_url
1507 1508 }
1508 1509 error = self.check_branch_permission(_branch_name, json_mode=True)
1509 1510 if error:
1510 1511 return {
1511 1512 'error': error,
1512 1513 'redirect_url': default_redirect_url
1513 1514 }
1514 1515
1515 1516 c.default_message = (_('Added file via RhodeCode Enterprise'))
1516 1517 c.f_path = f_path
1517 1518
1518 1519 r_post = self.request.POST
1519 1520
1520 1521 message = c.default_message
1521 1522 user_message = r_post.getall('message')
1522 1523 if isinstance(user_message, list) and user_message:
1523 1524 # we take the first from duplicated results if it's not empty
1524 1525 message = user_message[0] if user_message[0] else message
1525 1526
1526 1527 nodes = {}
1527 1528
1528 1529 for file_obj in r_post.getall('files_upload') or []:
1529 1530 content = file_obj.file
1530 1531 filename = file_obj.filename
1531 1532
1532 1533 root_path = f_path
1533 1534 pure_path = self.create_pure_path(root_path, filename)
1534 1535 node_path = pure_path.as_posix().lstrip('/')
1535 1536
1536 1537 nodes[safe_bytes(node_path)] = {
1537 1538 'content': content
1538 1539 }
1539 1540
1540 1541 if not nodes:
1541 1542 error = 'missing files'
1542 1543 return {
1543 1544 'error': error,
1544 1545 'redirect_url': default_redirect_url
1545 1546 }
1546 1547
1547 1548 author = self._rhodecode_db_user.full_contact
1548 1549
1549 1550 try:
1550 1551 commit = ScmModel().create_nodes(
1551 1552 user=self._rhodecode_db_user.user_id,
1552 1553 repo=self.db_repo,
1553 1554 message=message,
1554 1555 nodes=nodes,
1555 1556 parent_commit=c.commit,
1556 1557 author=author,
1557 1558 )
1558 1559 if len(nodes) == 1:
1559 1560 flash_message = _('Successfully committed {} new files').format(len(nodes))
1560 1561 else:
1561 1562 flash_message = _('Successfully committed 1 new file')
1562 1563
1563 1564 h.flash(flash_message, category='success')
1564 1565
1565 1566 default_redirect_url = h.route_path(
1566 1567 'repo_commit', repo_name=self.db_repo_name, commit_id=commit.raw_id)
1567 1568
1568 1569 except NonRelativePathError:
1569 1570 log.exception('Non Relative path found')
1570 1571 error = _('The location specified must be a relative path and must not '
1571 1572 'contain .. in the path')
1572 1573 h.flash(error, category='warning')
1573 1574
1574 1575 return {
1575 1576 'error': error,
1576 1577 'redirect_url': default_redirect_url
1577 1578 }
1578 1579 except (NodeError, NodeAlreadyExistsError) as e:
1579 1580 error = h.escape(e)
1580 1581 h.flash(error, category='error')
1581 1582
1582 1583 return {
1583 1584 'error': error,
1584 1585 'redirect_url': default_redirect_url
1585 1586 }
1586 1587 except Exception:
1587 1588 log.exception('Error occurred during commit')
1588 1589 error = _('Error occurred during commit')
1589 1590 h.flash(error, category='error')
1590 1591 return {
1591 1592 'error': error,
1592 1593 'redirect_url': default_redirect_url
1593 1594 }
1594 1595
1595 1596 return {
1596 1597 'error': None,
1597 1598 'redirect_url': default_redirect_url
1598 1599 }
1599 1600
1600 1601 @LoginRequired()
1601 1602 @HasRepoPermissionAnyDecorator('repository.write', 'repository.admin')
1602 1603 @CSRFRequired()
1603 1604 def repo_files_replace_file(self):
1604 1605 _ = self.request.translate
1605 1606 c = self.load_default_context()
1606 1607 commit_id, f_path = self._get_commit_and_path()
1607 1608
1608 1609 self._ensure_not_locked()
1609 1610
1610 1611 c.commit = self._get_commit_or_redirect(commit_id, redirect_after=False)
1611 1612 if c.commit is None:
1612 1613 c.commit = EmptyCommit(alias=self.rhodecode_vcs_repo.alias)
1613 1614
1614 1615 if self.rhodecode_vcs_repo.is_empty():
1615 1616 default_redirect_url = h.route_path(
1616 1617 'repo_summary', repo_name=self.db_repo_name)
1617 1618 else:
1618 1619 default_redirect_url = h.route_path(
1619 1620 'repo_commit', repo_name=self.db_repo_name, commit_id='tip')
1620 1621
1621 1622 if self.rhodecode_vcs_repo.is_empty():
1622 1623 # for empty repository we cannot check for current branch, we rely on
1623 1624 # c.commit.branch instead
1624 1625 _branch_name, _sha_commit_id, is_head = c.commit.branch, '', True
1625 1626 else:
1626 1627 _branch_name, _sha_commit_id, is_head = \
1627 1628 self._is_valid_head(commit_id, self.rhodecode_vcs_repo,
1628 1629 landing_ref=self.db_repo.landing_ref_name)
1629 1630
1630 1631 error = self.forbid_non_head(is_head, f_path, json_mode=True)
1631 1632 if error:
1632 1633 return {
1633 1634 'error': error,
1634 1635 'redirect_url': default_redirect_url
1635 1636 }
1636 1637 error = self.check_branch_permission(_branch_name, json_mode=True)
1637 1638 if error:
1638 1639 return {
1639 1640 'error': error,
1640 1641 'redirect_url': default_redirect_url
1641 1642 }
1642 1643
1643 1644 c.default_message = (_('Edited file {} via RhodeCode Enterprise').format(f_path))
1644 1645 c.f_path = f_path
1645 1646
1646 1647 r_post = self.request.POST
1647 1648
1648 1649 message = c.default_message
1649 1650 user_message = r_post.getall('message')
1650 1651 if isinstance(user_message, list) and user_message:
1651 1652 # we take the first from duplicated results if it's not empty
1652 1653 message = user_message[0] if user_message[0] else message
1653 1654
1654 1655 data_for_replacement = r_post.getall('files_upload') or []
1655 1656 if (objects_count := len(data_for_replacement)) > 1:
1656 1657 return {
1657 1658 'error': 'too many files for replacement',
1658 1659 'redirect_url': default_redirect_url
1659 1660 }
1660 1661 elif not objects_count:
1661 1662 return {
1662 1663 'error': 'missing files',
1663 1664 'redirect_url': default_redirect_url
1664 1665 }
1665 1666
1666 1667 content = data_for_replacement[0].file
1667 1668 retrieved_filename = data_for_replacement[0].filename
1668 1669
1669 1670 if retrieved_filename.split('.')[-1] != f_path.split('.')[-1]:
1670 1671 return {
1671 1672 'error': 'file extension of uploaded file doesn\'t match an original file\'s extension',
1672 1673 'redirect_url': default_redirect_url
1673 1674 }
1674 1675
1675 1676 author = self._rhodecode_db_user.full_contact
1676 1677
1677 1678 try:
1678 1679 commit = ScmModel().update_binary_node(
1679 1680 user=self._rhodecode_db_user.user_id,
1680 1681 repo=self.db_repo,
1681 1682 message=message,
1682 1683 node={
1683 1684 'content': content,
1684 1685 'file_path': f_path.encode(),
1685 1686 },
1686 1687 parent_commit=c.commit,
1687 1688 author=author,
1688 1689 )
1689 1690
1690 1691 h.flash(_('Successfully committed 1 new file'), category='success')
1691 1692
1692 1693 default_redirect_url = h.route_path(
1693 1694 'repo_commit', repo_name=self.db_repo_name, commit_id=commit.raw_id)
1694 1695
1695 1696 except (NodeError, NodeAlreadyExistsError) as e:
1696 1697 error = h.escape(e)
1697 1698 h.flash(error, category='error')
1698 1699
1699 1700 return {
1700 1701 'error': error,
1701 1702 'redirect_url': default_redirect_url
1702 1703 }
1703 1704 except Exception:
1704 1705 log.exception('Error occurred during commit')
1705 1706 error = _('Error occurred during commit')
1706 1707 h.flash(error, category='error')
1707 1708 return {
1708 1709 'error': error,
1709 1710 'redirect_url': default_redirect_url
1710 1711 }
1711 1712
1712 1713 return {
1713 1714 'error': None,
1714 1715 'redirect_url': default_redirect_url
1715 1716 }
@@ -1,29 +1,29 b''
1 1 # Copyright (C) 2015-2024 RhodeCode GmbH
2 2 #
3 3 # This program is free software: you can redistribute it and/or modify
4 4 # it under the terms of the GNU Affero General Public License, version 3
5 5 # (only), as published by the Free Software Foundation.
6 6 #
7 7 # This program is distributed in the hope that it will be useful,
8 8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 10 # GNU General Public License for more details.
11 11 #
12 12 # You should have received a copy of the GNU Affero General Public License
13 13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 14 #
15 15 # This program is dual-licensed. If you wish to learn more about the
16 16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18 18
19 19 from .fanout_cache import get_archival_cache_store
20 20 from .fanout_cache import get_archival_config
21 21
22 22 from .utils import archive_iterator
23 from .utils import ArchiveCacheLock
23 from .utils import ArchiveCacheGenerationLock
24 24
25 25
26 26 def includeme(config):
27 27 # init our cache at start
28 28 settings = config.get_settings()
29 29 get_archival_cache_store(settings)
@@ -1,427 +1,449 b''
1 1 # Copyright (C) 2015-2024 RhodeCode GmbH
2 2 #
3 3 # This program is free software: you can redistribute it and/or modify
4 4 # it under the terms of the GNU Affero General Public License, version 3
5 5 # (only), as published by the Free Software Foundation.
6 6 #
7 7 # This program is distributed in the hope that it will be useful,
8 8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 10 # GNU General Public License for more details.
11 11 #
12 12 # You should have received a copy of the GNU Affero General Public License
13 13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 14 #
15 15 # This program is dual-licensed. If you wish to learn more about the
16 16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18 18
19 19 import codecs
20 20 import contextlib
21 21 import functools
22 22 import os
23 23 import logging
24 24 import time
25 25 import typing
26 26 import zlib
27 27 import sqlite3
28 28
29 29 from ...ext_json import json
30 30 from .lock import GenerationLock
31 31 from .utils import format_size
32 32
33 33 log = logging.getLogger(__name__)
34 34
35 35 cache_meta = None
36 36
37 37 UNKNOWN = -241
38 38 NO_VAL = -917
39 39
40 40 MODE_BINARY = 'BINARY'
41 41
42 42
43 43 EVICTION_POLICY = {
44 44 'none': {
45 45 'evict': None,
46 46 },
47 47 'least-recently-stored': {
48 48 'evict': 'SELECT {fields} FROM archive_cache ORDER BY store_time',
49 49 },
50 50 'least-recently-used': {
51 51 'evict': 'SELECT {fields} FROM archive_cache ORDER BY access_time',
52 52 },
53 53 'least-frequently-used': {
54 54 'evict': 'SELECT {fields} FROM archive_cache ORDER BY access_count',
55 55 },
56 56 }
57 57
58 58
59 59 class DB:
60 60
61 61 def __init__(self):
62 62 self.connection = sqlite3.connect(':memory:')
63 63 self._init_db()
64 64
65 65 def _init_db(self):
66 66 qry = '''
67 67 CREATE TABLE IF NOT EXISTS archive_cache (
68 68 rowid INTEGER PRIMARY KEY,
69 69 key_file TEXT,
70 70 key_file_path TEXT,
71 71 filename TEXT,
72 72 full_path TEXT,
73 73 store_time REAL,
74 74 access_time REAL,
75 75 access_count INTEGER DEFAULT 0,
76 76 size INTEGER DEFAULT 0
77 77 )
78 78 '''
79 79
80 80 self.sql(qry)
81 81 self.connection.commit()
82 82
83 83 @property
84 84 def sql(self):
85 85 return self.connection.execute
86 86
87 87 def bulk_insert(self, rows):
88 88 qry = '''
89 89 INSERT INTO archive_cache (
90 90 rowid,
91 91 key_file,
92 92 key_file_path,
93 93 filename,
94 94 full_path,
95 95 store_time,
96 96 access_time,
97 97 access_count,
98 98 size
99 99 )
100 100 VALUES (
101 101 ?, ?, ?, ?, ?, ?, ?, ?, ?
102 102 )
103 103 '''
104 104 cursor = self.connection.cursor()
105 105 cursor.executemany(qry, rows)
106 106 self.connection.commit()
107 107
108 108
109 109 class FileSystemCache:
110 110
111 111 def __init__(self, index, directory, **settings):
112 112 self._index = index
113 113 self._directory = directory
114 114
115 @property
116 def directory(self):
117 """Cache directory."""
118 return self._directory
119
115 120 def _write_file(self, full_path, iterator, mode, encoding=None):
116 121 full_dir, _ = os.path.split(full_path)
117 122
118 123 for count in range(1, 11):
119 124 with contextlib.suppress(OSError):
120 125 os.makedirs(full_dir)
121 126
122 127 try:
123 128 # Another cache may have deleted the directory before
124 129 # the file could be opened.
125 130 writer = open(full_path, mode, encoding=encoding)
126 131 except OSError:
127 132 if count == 10:
128 133 # Give up after 10 tries to open the file.
129 134 raise
130 135 continue
131 136
132 137 with writer:
133 138 size = 0
134 139 for chunk in iterator:
135 140 size += len(chunk)
136 141 writer.write(chunk)
137 142 return size
138 143
139 144 def _get_keyfile(self, key):
140 145 return os.path.join(self._directory, f'{key}.key')
141 146
142 147 def store(self, key, value_reader, metadata):
143 148 filename, full_path = self.random_filename()
144 149 key_file = self._get_keyfile(key)
145 150
146 151 # STORE METADATA
147 152 _metadata = {
148 153 "version": "v1",
149 154 "filename": filename,
150 155 "full_path": full_path,
151 156 "key_file": key_file,
152 157 "store_time": time.time(),
153 158 "access_count": 1,
154 159 "access_time": 0,
155 160 "size": 0
156 161 }
157 162 if metadata:
158 163 _metadata.update(metadata)
159 164
160 165 reader = functools.partial(value_reader.read, 2**22)
161 166
162 167 iterator = iter(reader, b'')
163 168 size = self._write_file(full_path, iterator, 'xb')
164 169 metadata['size'] = size
165 170
166 171 # after archive is finished, we create a key to save the presence of the binary file
167 172 with open(key_file, 'wb') as f:
168 173 f.write(json.dumps(_metadata))
169 174
170 175 return key, size, MODE_BINARY, filename, _metadata
171 176
172 def fetch(self, key) -> tuple[typing.BinaryIO, dict]:
177 def fetch(self, key, retry=False, retry_attempts=10) -> tuple[typing.BinaryIO, dict]:
178
179 if retry:
180 for attempt in range(retry_attempts):
181 if key in self:
182 break
183 # we dind't find the key, wait 1s, and re-check
184 time.sleep(1)
185
173 186 if key not in self:
187 log.exception('requested {key} not found in {self}', key, self)
174 188 raise KeyError(key)
175 189
176 190 key_file = self._get_keyfile(key)
177 191 with open(key_file, 'rb') as f:
178 192 metadata = json.loads(f.read())
179 193
180 194 filename = metadata['filename']
181 195
182 196 try:
183 return open(os.path.join(self._directory, filename), 'rb'), metadata
197 return open(os.path.join(self.directory, filename), 'rb'), metadata
184 198 finally:
185 199 # update usage stats, count and accessed
186 200 metadata["access_count"] = metadata.get("access_count", 0) + 1
187 201 metadata["access_time"] = time.time()
188 202
189 203 with open(key_file, 'wb') as f:
190 204 f.write(json.dumps(metadata))
191 205
192 206 def random_filename(self):
193 207 """Return filename and full-path tuple for file storage.
194 208
195 209 Filename will be a randomly generated 28 character hexadecimal string
196 210 with ".archive_cache" suffixed. Two levels of sub-directories will be used to
197 211 reduce the size of directories. On older filesystems, lookups in
198 212 directories with many files may be slow.
199 213 """
200 214
201 215 hex_name = codecs.encode(os.urandom(16), 'hex').decode('utf-8')
202 216 sub_dir = os.path.join(hex_name[:2], hex_name[2:4])
203 217 name = hex_name[4:] + '.archive_cache'
204 218 filename = os.path.join(sub_dir, name)
205 full_path = os.path.join(self._directory, filename)
219 full_path = os.path.join(self.directory, filename)
206 220 return filename, full_path
207 221
208 222 def hash(self, key):
209 223 """Compute portable hash for `key`.
210 224
211 225 :param key: key to hash
212 226 :return: hash value
213 227
214 228 """
215 229 mask = 0xFFFFFFFF
216 230 return zlib.adler32(key.encode('utf-8')) & mask # noqa
217 231
218 232 def __contains__(self, key):
219 233 """Return `True` if `key` matching item is found in cache.
220 234
221 235 :param key: key matching item
222 236 :return: True if key matching item
223 237
224 238 """
225 239 key_file = self._get_keyfile(key)
226 240 return os.path.exists(key_file)
227 241
242 def __repr__(self):
243 return f'FileSystemCache(index={self._index}, dir={self.directory})'
244
228 245
229 246 class FanoutCache:
230 247 """Cache that shards keys and values."""
231 248
232 249 def __init__(
233 250 self, directory=None, **settings
234 251 ):
235 252 """Initialize cache instance.
236 253
237 254 :param str directory: cache directory
238 255 :param settings: settings dict
239 256
240 257 """
241 258 if directory is None:
242 259 raise ValueError('directory cannot be None')
243 260
244 261 directory = str(directory)
245 262 directory = os.path.expanduser(directory)
246 263 directory = os.path.expandvars(directory)
247 264 self._directory = directory
248 265
249 266 self._count = settings.pop('cache_shards')
250 267 self._locking_url = settings.pop('locking_url')
251 268
252 269 self._eviction_policy = settings['cache_eviction_policy']
253 270 self._cache_size_limit = settings['cache_size_limit']
254 271
255 272 self._shards = tuple(
256 273 FileSystemCache(
257 274 index=num,
258 275 directory=os.path.join(directory, 'shard_%03d' % num),
259 276 **settings,
260 277 )
261 278 for num in range(self._count)
262 279 )
263 280 self._hash = self._shards[0].hash
264 281
282 @property
283 def directory(self):
284 """Cache directory."""
285 return self._directory
286
265 287 def get_lock(self, lock_key):
266 288 return GenerationLock(lock_key, self._locking_url)
267 289
268 290 def _get_shard(self, key) -> FileSystemCache:
269 291 index = self._hash(key) % self._count
270 292 shard = self._shards[index]
271 293 return shard
272 294
273 295 def store(self, key, value_reader, metadata=None):
274 296 shard = self._get_shard(key)
275 297 return shard.store(key, value_reader, metadata)
276 298
277 def fetch(self, key):
299 def fetch(self, key, retry=False, retry_attempts=10):
278 300 """Return file handle corresponding to `key` from cache.
279 301 """
280 302 shard = self._get_shard(key)
281 return shard.fetch(key)
303 return shard.fetch(key, retry=retry, retry_attempts=retry_attempts)
282 304
283 305 def has_key(self, key):
284 306 """Return `True` if `key` matching item is found in cache.
285 307
286 308 :param key: key for item
287 309 :return: True if key is found
288 310
289 311 """
290 312 shard = self._get_shard(key)
291 313 return key in shard
292 314
293 315 def __contains__(self, item):
294 316 return self.has_key(item)
295 317
296 318 def evict(self, policy=None, size_limit=None):
297 319 """
298 320 Remove old items based on the conditions
299 321
300 322
301 323 explanation of this algo:
302 324 iterate over each shard, then for each shard iterate over the .key files
303 325 read the key files metadata stored. This gives us a full list of keys, cached_archived, their size and
304 326 access data, time creation, and access counts.
305 327
306 328 Store that into a memory DB so we can run different sorting strategies easily.
307 329 Summing the size is a sum sql query.
308 330
309 331 Then we run a sorting strategy based on eviction policy.
310 332 We iterate over sorted keys, and remove each checking if we hit the overall limit.
311 333 """
312 334
313 335 policy = policy or self._eviction_policy
314 336 size_limit = size_limit or self._cache_size_limit
315 337
316 338 select_policy = EVICTION_POLICY[policy]['evict']
317 339
318 340 log.debug('Running eviction policy \'%s\', and checking for size limit: %s',
319 341 policy, format_size(size_limit))
320 342
321 343 if select_policy is None:
322 344 return 0
323 345
324 346 db = DB()
325 347
326 348 data = []
327 349 cnt = 1
328 350 for shard in self._shards:
329 for key_file in os.listdir(shard._directory):
351 for key_file in os.listdir(shard.directory):
330 352 if key_file.endswith('.key'):
331 key_file_path = os.path.join(shard._directory, key_file)
353 key_file_path = os.path.join(shard.directory, key_file)
332 354 with open(key_file_path, 'rb') as f:
333 355 metadata = json.loads(f.read())
334 356
335 357 size = metadata.get('size')
336 358 filename = metadata.get('filename')
337 359 full_path = metadata.get('full_path')
338 360
339 361 if not size:
340 362 # in case we don't have size re-calc it...
341 363 size = os.stat(full_path).st_size
342 364
343 365 data.append([
344 366 cnt,
345 367 key_file,
346 368 key_file_path,
347 369 filename,
348 370 full_path,
349 371 metadata.get('store_time', 0),
350 372 metadata.get('access_time', 0),
351 373 metadata.get('access_count', 0),
352 374 size,
353 375 ])
354 376 cnt += 1
355 377
356 378 # Insert bulk data using executemany
357 379 db.bulk_insert(data)
358 380
359 381 ((total_size,),) = db.sql('SELECT COALESCE(SUM(size), 0) FROM archive_cache').fetchall()
360 382 log.debug('Analyzed %s keys, occupied: %s', len(data), format_size(total_size))
361 383 select_policy_qry = select_policy.format(fields='key_file_path, full_path, size')
362 384 sorted_keys = db.sql(select_policy_qry).fetchall()
363 385
364 386 removed_items = 0
365 387 removed_size = 0
366 388 for key, cached_file, size in sorted_keys:
367 389 # simulate removal impact BEFORE removal
368 390 total_size -= size
369 391
370 392 if total_size <= size_limit:
371 393 # we obtained what we wanted...
372 394 break
373 395
374 396 os.remove(cached_file)
375 397 os.remove(key)
376 398 removed_items += 1
377 399 removed_size += size
378 400
379 401 log.debug('Removed %s cache archives, and reduced size: %s', removed_items, format_size(removed_size))
380 402 return removed_items
381 403
382 404
383 405 def get_archival_config(config):
384 406
385 407 final_config = {
386 408
387 409 }
388 410
389 411 for k, v in config.items():
390 412 if k.startswith('archive_cache'):
391 413 final_config[k] = v
392 414
393 415 return final_config
394 416
395 417
396 418 def get_archival_cache_store(config):
397 419
398 420 global cache_meta
399 421 if cache_meta is not None:
400 422 return cache_meta
401 423
402 424 config = get_archival_config(config)
403 425 backend = config['archive_cache.backend.type']
404 426 if backend != 'filesystem':
405 427 raise ValueError('archive_cache.backend.type only supports "filesystem"')
406 428
407 429 archive_cache_locking_url = config['archive_cache.locking.url']
408 430 archive_cache_dir = config['archive_cache.filesystem.store_dir']
409 431 archive_cache_size_gb = config['archive_cache.filesystem.cache_size_gb']
410 432 archive_cache_shards = config['archive_cache.filesystem.cache_shards']
411 433 archive_cache_eviction_policy = config['archive_cache.filesystem.eviction_policy']
412 434
413 435 log.debug('Initializing archival cache instance under %s', archive_cache_dir)
414 436
415 437 # check if it's ok to write, and re-create the archive cache
416 438 if not os.path.isdir(archive_cache_dir):
417 439 os.makedirs(archive_cache_dir, exist_ok=True)
418 440
419 441 d_cache = FanoutCache(
420 442 archive_cache_dir,
421 443 locking_url=archive_cache_locking_url,
422 444 cache_shards=archive_cache_shards,
423 445 cache_size_limit=archive_cache_size_gb * 1024 * 1024 * 1024,
424 446 cache_eviction_policy=archive_cache_eviction_policy
425 447 )
426 448 cache_meta = d_cache
427 449 return cache_meta
@@ -1,59 +1,59 b''
1 1 # Copyright (C) 2015-2024 RhodeCode GmbH
2 2 #
3 3 # This program is free software: you can redistribute it and/or modify
4 4 # it under the terms of the GNU Affero General Public License, version 3
5 5 # (only), as published by the Free Software Foundation.
6 6 #
7 7 # This program is distributed in the hope that it will be useful,
8 8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 10 # GNU General Public License for more details.
11 11 #
12 12 # You should have received a copy of the GNU Affero General Public License
13 13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 14 #
15 15 # This program is dual-licensed. If you wish to learn more about the
16 16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18 18
19 19 import redis
20 20 from ..._vendor import redis_lock
21 from .utils import ArchiveCacheLock
21 from .utils import ArchiveCacheGenerationLock
22 22
23 23
24 24 class GenerationLock:
25 25 """
26 26 Locking mechanism that detects if a lock is acquired
27 27
28 28 with GenerationLock(lock_key):
29 29 compute_archive()
30 30 """
31 31 lock_timeout = 7200
32 32
33 33 def __init__(self, lock_key, url):
34 34 self.lock_key = lock_key
35 35 self._create_client(url)
36 36 self.lock = self.get_lock()
37 37
38 38 def _create_client(self, url):
39 39 connection_pool = redis.ConnectionPool.from_url(url)
40 40 self.writer_client = redis.StrictRedis(
41 41 connection_pool=connection_pool
42 42 )
43 43 self.reader_client = self.writer_client
44 44
45 45 def get_lock(self):
46 46 return redis_lock.Lock(
47 47 redis_client=self.writer_client,
48 48 name=self.lock_key,
49 49 expire=self.lock_timeout,
50 50 strict=True
51 51 )
52 52
53 53 def __enter__(self):
54 54 acquired = self.lock.acquire(blocking=False)
55 55 if not acquired:
56 raise ArchiveCacheLock('Failed to create a lock')
56 raise ArchiveCacheGenerationLock('Failed to create a lock')
57 57
58 58 def __exit__(self, exc_type, exc_val, exc_tb):
59 59 self.lock.release()
@@ -1,72 +1,72 b''
1 1 # Copyright (C) 2015-2024 RhodeCode GmbH
2 2 #
3 3 # This program is free software: you can redistribute it and/or modify
4 4 # it under the terms of the GNU Affero General Public License, version 3
5 5 # (only), as published by the Free Software Foundation.
6 6 #
7 7 # This program is distributed in the hope that it will be useful,
8 8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 10 # GNU General Public License for more details.
11 11 #
12 12 # You should have received a copy of the GNU Affero General Public License
13 13 # along with this program. If not, see <http://www.gnu.org/licenses/>.
14 14 #
15 15 # This program is dual-licensed. If you wish to learn more about the
16 16 # RhodeCode Enterprise Edition, including its added features, Support services,
17 17 # and proprietary license terms, please see https://rhodecode.com/licenses/
18 18
19 19 import os
20 20
21 21
22 class ArchiveCacheLock(Exception):
22 class ArchiveCacheGenerationLock(Exception):
23 23 pass
24 24
25 25
26 26 def archive_iterator(_reader, block_size: int = 4096 * 512):
27 27 # 4096 * 64 = 64KB
28 28 while 1:
29 29 data = _reader.read(block_size)
30 30 if not data:
31 31 break
32 32 yield data
33 33
34 34
35 35 def get_directory_statistics(start_path):
36 36 """
37 37 total_files, total_size, directory_stats = get_directory_statistics(start_path)
38 38
39 39 print(f"Directory statistics for: {start_path}\n")
40 40 print(f"Total files: {total_files}")
41 41 print(f"Total size: {format_size(total_size)}\n")
42 42
43 43 :param start_path:
44 44 :return:
45 45 """
46 46
47 47 total_files = 0
48 48 total_size = 0
49 49 directory_stats = {}
50 50
51 51 for dir_path, dir_names, file_names in os.walk(start_path):
52 52 dir_size = 0
53 53 file_count = len(file_names)
54 54
55 55 for file in file_names:
56 56 filepath = os.path.join(dir_path, file)
57 57 file_size = os.path.getsize(filepath)
58 58 dir_size += file_size
59 59
60 60 directory_stats[dir_path] = {'file_count': file_count, 'size': dir_size}
61 61 total_files += file_count
62 62 total_size += dir_size
63 63
64 64 return total_files, total_size, directory_stats
65 65
66 66
67 67 def format_size(size):
68 68 # Convert size in bytes to a human-readable format (e.g., KB, MB, GB)
69 69 for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
70 70 if size < 1024:
71 71 return f"{size:.2f} {unit}"
72 72 size /= 1024
General Comments 0
You need to be logged in to leave comments. Login now