Skip to content

Commit

Permalink
use bulk operations
Browse files Browse the repository at this point in the history
  • Loading branch information
escattone committed May 3, 2024
1 parent a6a7ff7 commit 7a7a4f5
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 23 deletions.
54 changes: 40 additions & 14 deletions kitsune/dashboards/models.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging

from django.db import IntegrityError, models, transaction
from django.db.models import Subquery
from django.db.models import Q, Subquery
from django.utils.translation import gettext_lazy as _lazy

from kitsune.dashboards import PERIODS
Expand Down Expand Up @@ -39,22 +39,48 @@ def reload_period_from_analytics(cls, period, verbose=False):
if verbose:
log.info(f"Creating fresh instances of {cls.__name__} with period = {period}...")

# We're sacrificing memory here in order to reduce the number of database queries.
locale_and_slug_pairs = Q()
instance_by_locale_and_slug = {}
for (locale, slug), visits in googleanalytics.pageviews_by_document(
period, verbose=verbose
):
try:
with transaction.atomic():
cls.objects.create(
document_id=Subquery(
Document.objects.filter(locale=locale, slug=slug).values("id")
),
period=period,
visits=visits,
)
except IntegrityError:
# We've already rolled back the bad insertion, which was due to the
# fact that the document no longer exists, so let's move on.
pass
locale_and_slug_pairs |= Q(locale=locale, slug=slug)
instance_by_locale_and_slug[(locale, slug)] = cls(
document_id=Subquery(
Document.objects.filter(locale=locale, slug=slug).values("id")
),
period=period,
visits=visits,
)

# Make this a function, so we can repeat it if necessary.
def bulk_create():
"""
Create all of the instances in one shot, but exclude instances that refer
to a non-existing Document, so we avoid triggering an integrity error. A
call to this function makes only two databases queries no matter how many
instances we need to validate and create.
"""
cls.objects.bulk_create(
[
instance_by_locale_and_slug[locale_and_slug]
for locale_and_slug in Document.objects.filter(
locale_and_slug_pairs
).values_list("locale", "slug")
]
)

try:
with transaction.atomic():
bulk_create()
except IntegrityError:
# There is a very slim chance that one or more Documents are deleted in the
# moment of time between the creation of the list of valid instances and the
# call to cls.objects.bulk_create, so let's give it one more try, assuming
# there's an even slimmer chance that lightning will strike twice in a row.
# If this one fails, we'll roll-back the entire effort.
bulk_create()


L10N_TOP20_CODE = "percent_localized_top20"
Expand Down
55 changes: 46 additions & 9 deletions kitsune/questions/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
from django.contrib.contenttypes.fields import GenericForeignKey, GenericRelation
from django.contrib.contenttypes.models import ContentType
from django.core.cache import cache
from django.db import IntegrityError, models
from django.db.models import Count, Subquery
from django.db import IntegrityError, models, transaction
from django.db.models import Count
from django.db.models.functions import Now
from django.db.models.signals import post_save
from django.dispatch import receiver
Expand Down Expand Up @@ -732,15 +732,52 @@ def reload_from_analytics(cls, verbose=False):
"""Update the stats from Google Analytics."""
from kitsune.sumo import googleanalytics

for question_id, visits in googleanalytics.pageviews_by_question(verbose=verbose):
try:
cls.objects.update_or_create(
question_id=Subquery(Question.objects.filter(id=question_id).values("id")),
defaults=dict(visits=visits),
with transaction.atomic():
# First, let's gather some data we need. We're sacrificing memory
# here in order to reduce the number of database queries later on.
question_ids = set()
instance_by_id = {}
for question_id, visits in googleanalytics.pageviews_by_question(verbose=verbose):
question_ids.add(question_id)
instance_by_id[question_id] = cls(question_id=question_id, visits=visits)

# Next, let's clear out the stale instances that have new results.
if verbose:
log.info(f"Deleting all stale instances of {cls.__name__}...")

cls.objects.filter(question_id__in=question_ids).delete()

# Then we can create fresh instances for the questions that have results.
if verbose:
log.info(f"Creating fresh instances of {cls.__name__}...")

# Make this a function, so we can repeat it if necessary.
def bulk_create():
"""
Create all of the instances in one shot, but exclude instances that refer
to a non-existing Question, so we avoid triggering an integrity error. A
call to this function makes only two databases queries no matter how many
instances we need to validate and create.
"""
cls.objects.bulk_create(
[
instance_by_id[id]
for id in Question.objects.filter(question__in=question_ids).values_list(
"id", flat=True
)
]
)

try:
with transaction.atomic():
bulk_create()
except IntegrityError:
# Skip the update-or-create if the question no longer exists.
pass
# There is a very slim chance that one or more Questions are deleted in the
# moment of time between the creation of the list of valid instances and the
# call to cls.objects.bulk_create, so let's give it one more try, assuming
# there's an even slimmer chance that lightning will strike twice in a row.
# If this one fails, we'll roll-back the entire effort.
bulk_create()


class QuestionLocale(ModelBase):
Expand Down

0 comments on commit 7a7a4f5

Please sign in to comment.