mycampusscript.py: investigate_jsonlog() reverse-engineering helper function

704d4108 · prechelt · d9640ae2 · 704d4108
Commit 704d4108 authored 6 months ago by prechelt
--- a/mycampusscript.py
+++ b/mycampusscript.py
+"""
+Works roughly as follows:
+1. Overall, we script a POST request (RQmain) to a single, complex form to create a Sakai Assignment.
+2. However, the form includes one or more uploaded documents and those uploads (must) happen upfront
+   (RQupload).
+3. Worse, the overall form must be sent upfront of even the uploads (RQmain.0)!
+   At least that is what happens when one opens the upload dialog. A symptom is the fact that one
+   cannot open the upload dialog unless one has entered a title for the Assignment.
+4. This appears to be needed because the URL created for each uploaded document contains an ID
+   for the assignment.
+5. Reverse engineering the details of this is difficult, because GETting the page on which the form resides
+   (RQmain.get) involves about 40 requests.
+6. Therefore, we do not yet know where the identitity of the Assignment freshly created in step 3
+   is to be found. Our best guess is: Somewhere in the resulting HTML page.
+7. Likewise for the URLs of the uploaded documents: They have to be sent as part of the big
+   (>80 parameters) RQmain request, but the URL is not a main outcome of RQupload.   
+8. Although the upload dialog is a separate window on top of the RQmain form, RQupload does not
+   only close that window, it also reloads the RQmain page, presumably to include the freshly uploaded
+   file in the list of files shown thereon.
+"""
 import collections.abc
 import datetime as dt
 import mimetypes
@@ -101,6 +122,10 @@ ReplacementsFunc = collections.abc.Callable[[StrAnyDict], StrStrDict]
 def main(scriptname: str, cmdname: str, configfile: str):
+    investigate_jsonlog("/c/temp/irgendwas3.json")
+    return
    with open(configfile, 'rt') as f:
        config = yaml.load(f, Loader=yaml.Loader)
    site_url: str = find_value_or_help(config, 'site_url')
@@ -246,8 +271,8 @@ def upload_attachment(config: StrAnyDict, filepath: str):
    See c:/temp/a.json log.entries[164].
    """
    base_url = f"{config['site_url']}/{ASSIGNMENTS_CREATION_ATTACHMENT_UPLOAD_PATH}"
-    query_args = dict(special='upload', panel='Main', sakai_action='doAttachupload',
+    query_args = dict(special='upload', sakai_action='doAttachupload',  # panel='Main', ?
-                      csrf_token=config[CSRF_TOKEN_CONFIGENTRY_NAME])
+                      sakai_csrf_token=config[CSRF_TOKEN_CONFIGENTRY_NAME])
    # TODO 1: continue here:
    # We next need to construct the multipart body of the POST request.
    # See "POST Multiple Multipart-Encoded Files" in https://requests.readthedocs.io/en/latest/user/advanced/
@@ -258,6 +283,7 @@ def upload_attachment(config: StrAnyDict, filepath: str):
    # Das JSON ist dafür unpraktisch, weil der ganze multipart-Rumpf nur als Text dasteht.
    # In der Word-Datei findet sich die aufgeschlüsselte Form, die der Browser dafür anzeigt.
    # Erzeugung eines solchen Requests, siehe https://stackoverflow.com/a/12385661
+    headers = dict(Referer=f"{config['site_url']}/{ASSIGNMENTS_CREATION_ATTACHMENT_UPLOAD_PATH}")
    filename = os.path.basename(filepath)
    content_type, encoding = mimetypes.guess_type(filename)
    multipart_body = dict(
@@ -273,7 +299,54 @@ def upload_attachment(config: StrAnyDict, filepath: str):
        sakai_csrf_token=config[CSRF_TOKEN_CONFIGENTRY_NAME]
    )
    multipart_body['from'] = 'list'  # separate because 'from' is a keyword
-    requests.post(...)
+    resp = requests.post(base_url, 
+                         params=query_args,
+                         headers=headers,
+                         files=multipart_body, 
+                         cookies={SESSIONCOOKIE_NAME: config['cookies'][SESSIONCOOKIE_NAME]})
+    print(resp.text)
+def investigate_jsonlog(jsonfile: str):
+    """
+    Helper routine for reverse engineering:
+    We record a complete RQmain-posting episode with two attachments in Firefox
+    (F12->Network; cogwheel menu->Persist Logs; perform episode; SaveAs:irgendwas.json).
+    We pull out parts needed for understanding here.
+    """
+    import json
+    import dictns
+    testdata = dictns.Namespace(dict(
+        title="irgendwas3",
+        a_id='dbb411b5-195d-4089-ae66-6e9c8f01b81c',
+        instructions="irgendwelche Anleitung",
+        attachments=["ajeunwuk", "akeunwuk"],
+        group="Übung 01",  # name prefix only
+    ))
+    with open(jsonfile, 'r') as f:
+        log = json.load(f)
+    ns = dictns.Namespace(log)
+    # the first RQmain POST request is #111, response is 302 to panel=Main
+    firstrequest = ns.log.entries[111]
+    assert firstrequest.request.postData.params[5].value == testdata.title
+    # ns.log.entries[112] is another 302 to ASSIGNMENTS_CREATION_ATTACHMENT_UPLOAD_PATH
+    # popuprequest = ns.log.entries[113]
+    url = urllib.parse.urlparse("http://localhost")
+    for i in range(len(ns.log.entries)):  # range(111, 178+1):
+        entry = ns.log.entries[i]
+        if "portal/site" not in entry.request.url:
+            continue
+        last_url = url
+        url = urllib.parse.urlparse(entry.request.url)
+        if url == last_url:
+            msg = "(ditto)"
+        elif url.path == last_url.path:
+            msg = url.query
+        else:
+            msg = entry.request.url
+        print(i, entry.response.status, msg)
+    # print(popuprequest.response.content.text)
 def site_id(site_url: str) -> str:
@@ -287,6 +360,8 @@ def slurp(filename: str) -> bytes:
    with open(filename, 'rb') as f:
        return f.read()
 if __name__ == '__main__':
    if len(sys.argv) != 2+1 or sys.argv[1] != 'create_multigroup_assgmt':
        print(usage)